Backport from xs.

This commit is contained in:
default 2023-12-27 12:54:38 +01:00
parent 94a6274a46
commit bf435af788
4 changed files with 253 additions and 134 deletions

140
xs_hex.h
View file

@ -4,32 +4,111 @@
#define _XS_HEX_H #define _XS_HEX_H
xs_str *xs_hex_enc(const xs_val *data, int size); int xs_is_hex_digit(char str);
xs_val *xs_hex_dec(const xs_str *hex, int *size); void xs_hex_enc_1(char **dst, const char **src);
int xs_is_hex(const char *str); int xs_hex_dec_1(char **dst, const char **src);
char *_xs_hex_enc(char *dst, const char *src, int src_size);
char *_xs_hex_dec(char *dst, const char *src, int src_size);
#ifdef _XS_H
xs_str *xs_hex_enc(const xs_val *data, int size);
xs_val *xs_hex_dec(const xs_str *hex, int *size);
int xs_is_hex(const char *str);
#endif /* _XS_H */
#ifdef XS_IMPLEMENTATION #ifdef XS_IMPLEMENTATION
#include <string.h>
/** hex **/ /** hex **/
static char rev_hex_digits[] = "fedcba9876543210FEDCBA"; static char rev_hex_digits[] = "fedcba9876543210FEDCBA";
int xs_is_hex_digit(char str)
/* checks if the char is an hex digit */
{
return strchr(rev_hex_digits, str) != NULL;
}
void xs_hex_enc_1(char **dst, const char **src)
/* decodes one character into two hex digits */
{
const char *i = *src;
char *o = *dst;
*o++ = rev_hex_digits[0xf - (*i >> 4 & 0xf)];
*o++ = rev_hex_digits[0xf - (*i & 0xf)];
*src = i + 1;
*dst = o;
}
int xs_hex_dec_1(char **dst, const char **src)
/* decodes two hex digits (returns 0 on error) */
{
const char *i = *src;
char *o = *dst;
char *d1 = strchr(rev_hex_digits, *i++);
char *d2 = strchr(rev_hex_digits, *i++);
if (!d1 || !d2) {
/* decoding error */
return 0;
}
*o++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
(0xf - ((d2 - rev_hex_digits) & 0xf));
*src = i;
*dst = o;
return 1;
}
char *_xs_hex_enc(char *dst, const char *src, int src_size)
/* hex-encodes the src buffer into dst, which has enough size */
{
const char *e = src + src_size;
while (src < e)
xs_hex_enc_1(&dst, &src);
return dst;
}
char *_xs_hex_dec(char *dst, const char *src, int src_size)
/* hex-decodes the src string int dst, which has enough size.
return NULL on decoding errors or the final position of dst */
{
if (src_size % 2)
return NULL;
const char *e = src + src_size;
while (src < e) {
if (!xs_hex_dec_1(&dst, &src))
return NULL;
}
return dst;
}
#ifdef _XS_H
xs_str *xs_hex_enc(const xs_val *data, int size) xs_str *xs_hex_enc(const xs_val *data, int size)
/* returns an hexdump of data */ /* returns an hexdump of data */
{ {
xs_str *s; xs_str *s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
char *p;
int n;
p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1)); char *q = _xs_hex_enc(s, data, size);
for (n = 0; n < size; n++) { *q = '\0';
*p++ = rev_hex_digits[0xf - (*data >> 4 & 0xf)];
*p++ = rev_hex_digits[0xf - (*data & 0xf)];
data++;
}
*p = '\0';
return s; return s;
} }
@ -40,29 +119,14 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size)
{ {
int sz = strlen(hex); int sz = strlen(hex);
xs_val *s = NULL; xs_val *s = NULL;
char *p;
int n;
if (sz % 2)
return NULL;
p = s = xs_realloc(NULL, _xs_blk_size(sz / 2 + 1));
for (n = 0; n < sz; n += 2) {
char *d1 = strchr(rev_hex_digits, *hex++);
char *d2 = strchr(rev_hex_digits, *hex++);
if (!d1 || !d2) {
/* decoding error */
return xs_free(s);
}
*p++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
(0xf - ((d2 - rev_hex_digits) & 0xf));
}
*p = '\0';
*size = sz / 2; *size = sz / 2;
s = xs_realloc(NULL, _xs_blk_size(*size + 1));
if (!_xs_hex_dec(s, hex, sz))
return xs_free(s);
s[*size] = '\0';
return s; return s;
} }
@ -71,14 +135,18 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size)
int xs_is_hex(const char *str) int xs_is_hex(const char *str)
/* returns 1 if str is an hex string */ /* returns 1 if str is an hex string */
{ {
if (strlen(str) % 2)
return 0;
while (*str) { while (*str) {
if (strchr(rev_hex_digits, *str++) == NULL) if (!xs_is_hex_digit(*str++))
return 0; return 0;
} }
return 1; return 1;
} }
#endif /* _XS_H */
#endif /* XS_IMPLEMENTATION */ #endif /* XS_IMPLEMENTATION */

View file

@ -7,9 +7,13 @@
int xs_socket_timeout(int s, double rto, double sto); int xs_socket_timeout(int s, double rto, double sto);
int xs_socket_server(const char *addr, const char *serv); int xs_socket_server(const char *addr, const char *serv);
FILE *xs_socket_accept(int rs); FILE *xs_socket_accept(int rs);
xs_str *xs_socket_peername(int s); int _xs_socket_peername(int s, char *buf, int buf_size);
int xs_socket_connect(const char *addr, const char *serv); int xs_socket_connect(const char *addr, const char *serv);
#ifdef _XS_H
xs_str *xs_socket_peername(int s);
#endif
#ifdef XS_IMPLEMENTATION #ifdef XS_IMPLEMENTATION
@ -17,6 +21,9 @@ int xs_socket_connect(const char *addr, const char *serv);
#include <netdb.h> #include <netdb.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
int xs_socket_timeout(int s, double rto, double sto) int xs_socket_timeout(int s, double rto, double sto)
@ -100,34 +107,28 @@ FILE *xs_socket_accept(int rs)
} }
xs_str *xs_socket_peername(int s) int _xs_socket_peername(int s, char *buf, int buf_size)
/* returns the remote address as a string */ /* fill the buffer with the socket peername */
{ {
xs_str *ip = NULL;
struct sockaddr_storage addr; struct sockaddr_storage addr;
socklen_t slen = sizeof(addr); socklen_t slen = sizeof(addr);
if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) {
char buf[1024];
const char *p = NULL; const char *p = NULL;
if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) {
if (addr.ss_family == AF_INET) { if (addr.ss_family == AF_INET) {
struct sockaddr_in *sa = (struct sockaddr_in *)&addr; struct sockaddr_in *sa = (struct sockaddr_in *)&addr;
p = inet_ntop(AF_INET, &sa->sin_addr, buf, sizeof(buf)); p = inet_ntop(AF_INET, &sa->sin_addr, buf, buf_size);
} }
else else
if (addr.ss_family == AF_INET6) { if (addr.ss_family == AF_INET6) {
struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr; struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr;
p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, sizeof(buf)); p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, buf_size);
}
} }
if (p != NULL) return p != NULL;
ip = xs_str_new(p);
}
return ip;
} }
@ -195,6 +196,22 @@ int xs_socket_connect(const char *addr, const char *serv)
} }
#ifdef _XS_H
xs_str *xs_socket_peername(int s)
/* returns the remote address as a string */
{
char buf[2028];
xs_str *p = NULL;
if (_xs_socket_peername(s, buf, sizeof(buf)))
p = xs_str_new(buf);
return p;
}
#endif /* _XS_H */
#endif /* XS_IMPLEMENTATION */ #endif /* XS_IMPLEMENTATION */
#endif /* _XS_SOCKET_H */ #endif /* _XS_SOCKET_H */

View file

@ -5,7 +5,6 @@
#define _XS_UNICODE_H #define _XS_UNICODE_H
int _xs_utf8_enc(char buf[4], unsigned int cpoint); int _xs_utf8_enc(char buf[4], unsigned int cpoint);
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
unsigned int xs_utf8_dec(char **str); unsigned int xs_utf8_dec(char **str);
int xs_unicode_width(unsigned int cpoint); int xs_unicode_width(unsigned int cpoint);
int xs_is_surrogate(unsigned int cpoint); int xs_is_surrogate(unsigned int cpoint);
@ -21,13 +20,20 @@
int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint); int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint);
int xs_unicode_is_alpha(unsigned int cpoint); int xs_unicode_is_alpha(unsigned int cpoint);
#ifdef _XS_H
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
#endif
#ifdef XS_IMPLEMENTATION #ifdef XS_IMPLEMENTATION
#ifndef countof
#define countof(a) (sizeof((a)) / sizeof((*a)))
#endif
int _xs_utf8_enc(char buf[4], unsigned int cpoint) int _xs_utf8_enc(char buf[4], unsigned int cpoint)
/* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */
{ {
unsigned char *p = (unsigned char *)buf; char *p = buf;
if (cpoint < 0x80) /* 1 byte char */ if (cpoint < 0x80) /* 1 byte char */
*p++ = cpoint & 0xff; *p++ = cpoint & 0xff;
@ -48,27 +54,16 @@ int _xs_utf8_enc(char buf[4], unsigned int cpoint)
*p++ = 0x80 | (cpoint & 0x3f); *p++ = 0x80 | (cpoint & 0x3f);
} }
return p - (unsigned char *)buf; return p - buf;
}
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
/* encodes an Unicode codepoint to utf-8 into str */
{
char tmp[4];
int c = _xs_utf8_enc(tmp, cpoint);
return xs_append_m(str, tmp, c);
} }
unsigned int xs_utf8_dec(char **str) unsigned int xs_utf8_dec(char **str)
/* decodes an utf-8 char inside str and updates the pointer */ /* decodes an utf-8 char inside str and updates the pointer */
{ {
unsigned char *p = (unsigned char *)*str; char *p = *str;
unsigned int cpoint = 0; unsigned int cpoint = 0;
int c = *p++; unsigned char c = *p++;
int cb = 0; int cb = 0;
if ((c & 0x80) == 0) { /* 1 byte char */ if ((c & 0x80) == 0) { /* 1 byte char */
@ -91,30 +86,19 @@ unsigned int xs_utf8_dec(char **str)
} }
/* process the continuation bytes */ /* process the continuation bytes */
while (cb--) { while (cb > 0 && *p && (*p & 0xc0) == 0x80)
if ((*p & 0xc0) == 0x80) cpoint |= (*p++ & 0x3f) << (--cb * 6);
cpoint |= (*p++ & 0x3f) << (cb * 6);
else {
cpoint = 0xfffd;
break;
}
}
*str = (char *)p; /* incomplete or broken? */
if (cb)
cpoint = 0xfffd;
*str = p;
return cpoint; return cpoint;
} }
static int int_range_cmp(const void *p1, const void *p2) /** Unicode character width: intentionally dead simple **/
{
const unsigned int *a = p1;
const unsigned int *b = p2;
return *a < b[0] ? -1 : *a > b[1] ? 1 : 0;
}
/* intentionally dead simple */
static unsigned int xs_unicode_width_table[] = { static unsigned int xs_unicode_width_table[] = {
0x300, 0x36f, 0, /* diacritics */ 0x300, 0x36f, 0, /* diacritics */
@ -132,12 +116,23 @@ static unsigned int xs_unicode_width_table[] = {
int xs_unicode_width(unsigned int cpoint) int xs_unicode_width(unsigned int cpoint)
/* returns the width in columns of a Unicode codepoint (somewhat simplified) */ /* returns the width in columns of a Unicode codepoint (somewhat simplified) */
{ {
unsigned int *r = bsearch(&cpoint, xs_unicode_width_table, int b = 0;
sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3), int t = countof(xs_unicode_width_table) / 3 - 1;
sizeof(unsigned int) * 3,
int_range_cmp);
return r ? r[2] : 1; while (t >= b) {
int n = (b + t) / 2;
unsigned int *p = &xs_unicode_width_table[n * 3];
if (cpoint < p[0])
t = n - 1;
else
if (cpoint > p[1])
b = n + 1;
else
return p[2];
}
return 1;
} }
@ -167,53 +162,62 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
} }
#ifdef _XS_H
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
/* encodes an Unicode codepoint to utf-8 into str */
{
char tmp[4];
int c = _xs_utf8_enc(tmp, cpoint);
return xs_append_m(str, tmp, c);
}
#endif /* _XS_H */
#ifdef _XS_UNICODE_TBL_H #ifdef _XS_UNICODE_TBL_H
/* include xs_unicode_tbl.h before this one to use these functions */ /* include xs_unicode_tbl.h before this one to use these functions */
static int int_cmp(const void *p1, const void *p2)
{
const unsigned int *a = p1;
const unsigned int *b = p2;
return *a < *b ? -1 : *a > *b ? 1 : 0;
}
unsigned int *_xs_unicode_upper_search(unsigned int cpoint) unsigned int *_xs_unicode_upper_search(unsigned int cpoint)
/* searches for an uppercase codepoint in the case fold table */ /* searches for an uppercase codepoint in the case fold table */
{ {
return bsearch(&cpoint, xs_unicode_case_fold_table, int b = 0;
sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2), int t = countof(xs_unicode_case_fold_table) / 2 + 1;
sizeof(unsigned int) * 2,
int_cmp);
}
while (t >= b) {
int n = (b + t) / 2;
unsigned int *p = &xs_unicode_case_fold_table[n * 2];
unsigned int *_xs_unicode_lower_search(unsigned int cpoint) if (cpoint < p[0])
/* searches for a lowercase codepoint in the case fold table */ t = n - 1;
{ else
unsigned int *p = xs_unicode_case_fold_table + 1; if (cpoint > p[0])
unsigned int *e = xs_unicode_case_fold_table + b = n + 1;
sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int); else
while (p < e) {
if (cpoint == *p)
return p; return p;
p += 2;
} }
return NULL; return NULL;
} }
unsigned int xs_unicode_to_upper(unsigned int cpoint) unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
/* returns the cpoint to uppercase */ /* searches for a lowercase codepoint in the case fold table */
{ {
unsigned int *p = _xs_unicode_lower_search(cpoint); unsigned int *p = xs_unicode_case_fold_table;
unsigned int *e = p + countof(xs_unicode_case_fold_table);
return p == NULL ? cpoint : p[-1]; while (p < e) {
if (cpoint == p[1])
return p;
p += 2;
}
return NULL;
} }
@ -226,20 +230,40 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint)
} }
unsigned int xs_unicode_to_upper(unsigned int cpoint)
/* returns the cpoint to uppercase */
{
unsigned int *p = _xs_unicode_lower_search(cpoint);
return p == NULL ? cpoint : p[0];
}
int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac) int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac)
/* applies unicode Normalization Form D */ /* applies unicode Normalization Form D */
{ {
unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table, int b = 0;
sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3), int t = countof(xs_unicode_nfd_table) / 3 - 1;
sizeof(unsigned int) * 3,
int_cmp);
if (r != NULL) { while (t >= b) {
*base = r[1]; int n = (b + t) / 2;
*diac = r[2]; unsigned int *p = &xs_unicode_nfd_table[n * 3];
int c = cpoint - p[0];
if (c < 0)
t = n - 1;
else
if (c > 0)
b = n + 1;
else {
*base = p[1];
*diac = p[2];
return 1;
}
} }
return !!r; return 0;
} }
@ -247,8 +271,7 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
/* applies unicode Normalization Form C */ /* applies unicode Normalization Form C */
{ {
unsigned int *p = xs_unicode_nfd_table; unsigned int *p = xs_unicode_nfd_table;
unsigned int *e = xs_unicode_nfd_table + unsigned int *e = p + countof(xs_unicode_nfd_table);
sizeof(xs_unicode_nfd_table) / sizeof(unsigned int);
while (p < e) { while (p < e) {
if (p[1] == base && p[2] == diac) { if (p[1] == base && p[2] == diac) {
@ -266,12 +289,23 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
int xs_unicode_is_alpha(unsigned int cpoint) int xs_unicode_is_alpha(unsigned int cpoint)
/* checks if a codepoint is an alpha (i.e. a letter) */ /* checks if a codepoint is an alpha (i.e. a letter) */
{ {
unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table, int b = 0;
sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2), int t = countof(xs_unicode_alpha_table) / 2 - 1;
sizeof(unsigned int) * 2,
int_range_cmp);
return !!r; while (t >= b) {
int n = (b + t) / 2;
unsigned int *p = &xs_unicode_alpha_table[n * 2];
if (cpoint < p[0])
t = n - 1;
else
if (cpoint > p[1])
b = n + 1;
else
return 1;
}
return 0;
} }

View file

@ -1 +1 @@
/* 3582ff265e19407df1d532eb1d90c372fe22ca62 2023-12-08T06:10:40+01:00 */ /* fd50c72456b717bb235eec8fe5f712da5f695f2b 2023-12-27T12:51:14+01:00 */