mirror of
https://codeberg.org/grunfink/snac2.git
synced 2024-11-22 05:15:04 +00:00
Backport from xs.
This commit is contained in:
parent
94a6274a46
commit
bf435af788
4 changed files with 253 additions and 134 deletions
140
xs_hex.h
140
xs_hex.h
|
@ -4,32 +4,111 @@
|
|||
|
||||
#define _XS_HEX_H
|
||||
|
||||
xs_str *xs_hex_enc(const xs_val *data, int size);
|
||||
xs_val *xs_hex_dec(const xs_str *hex, int *size);
|
||||
int xs_is_hex(const char *str);
|
||||
int xs_is_hex_digit(char str);
|
||||
void xs_hex_enc_1(char **dst, const char **src);
|
||||
int xs_hex_dec_1(char **dst, const char **src);
|
||||
char *_xs_hex_enc(char *dst, const char *src, int src_size);
|
||||
char *_xs_hex_dec(char *dst, const char *src, int src_size);
|
||||
|
||||
#ifdef _XS_H
|
||||
xs_str *xs_hex_enc(const xs_val *data, int size);
|
||||
xs_val *xs_hex_dec(const xs_str *hex, int *size);
|
||||
int xs_is_hex(const char *str);
|
||||
#endif /* _XS_H */
|
||||
|
||||
|
||||
#ifdef XS_IMPLEMENTATION
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/** hex **/
|
||||
|
||||
static char rev_hex_digits[] = "fedcba9876543210FEDCBA";
|
||||
|
||||
int xs_is_hex_digit(char str)
|
||||
/* checks if the char is an hex digit */
|
||||
{
|
||||
return strchr(rev_hex_digits, str) != NULL;
|
||||
}
|
||||
|
||||
|
||||
void xs_hex_enc_1(char **dst, const char **src)
|
||||
/* decodes one character into two hex digits */
|
||||
{
|
||||
const char *i = *src;
|
||||
char *o = *dst;
|
||||
|
||||
*o++ = rev_hex_digits[0xf - (*i >> 4 & 0xf)];
|
||||
*o++ = rev_hex_digits[0xf - (*i & 0xf)];
|
||||
|
||||
*src = i + 1;
|
||||
*dst = o;
|
||||
}
|
||||
|
||||
|
||||
int xs_hex_dec_1(char **dst, const char **src)
|
||||
/* decodes two hex digits (returns 0 on error) */
|
||||
{
|
||||
const char *i = *src;
|
||||
char *o = *dst;
|
||||
|
||||
char *d1 = strchr(rev_hex_digits, *i++);
|
||||
char *d2 = strchr(rev_hex_digits, *i++);
|
||||
|
||||
if (!d1 || !d2) {
|
||||
/* decoding error */
|
||||
return 0;
|
||||
}
|
||||
|
||||
*o++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
|
||||
(0xf - ((d2 - rev_hex_digits) & 0xf));
|
||||
|
||||
*src = i;
|
||||
*dst = o;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
char *_xs_hex_enc(char *dst, const char *src, int src_size)
|
||||
/* hex-encodes the src buffer into dst, which has enough size */
|
||||
{
|
||||
const char *e = src + src_size;
|
||||
|
||||
while (src < e)
|
||||
xs_hex_enc_1(&dst, &src);
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
char *_xs_hex_dec(char *dst, const char *src, int src_size)
|
||||
/* hex-decodes the src string int dst, which has enough size.
|
||||
return NULL on decoding errors or the final position of dst */
|
||||
{
|
||||
if (src_size % 2)
|
||||
return NULL;
|
||||
|
||||
const char *e = src + src_size;
|
||||
|
||||
while (src < e) {
|
||||
if (!xs_hex_dec_1(&dst, &src))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
#ifdef _XS_H
|
||||
|
||||
xs_str *xs_hex_enc(const xs_val *data, int size)
|
||||
/* returns an hexdump of data */
|
||||
{
|
||||
xs_str *s;
|
||||
char *p;
|
||||
int n;
|
||||
xs_str *s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
|
||||
|
||||
p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
|
||||
char *q = _xs_hex_enc(s, data, size);
|
||||
|
||||
for (n = 0; n < size; n++) {
|
||||
*p++ = rev_hex_digits[0xf - (*data >> 4 & 0xf)];
|
||||
*p++ = rev_hex_digits[0xf - (*data & 0xf)];
|
||||
data++;
|
||||
}
|
||||
|
||||
*p = '\0';
|
||||
*q = '\0';
|
||||
|
||||
return s;
|
||||
}
|
||||
|
@ -40,29 +119,14 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size)
|
|||
{
|
||||
int sz = strlen(hex);
|
||||
xs_val *s = NULL;
|
||||
char *p;
|
||||
int n;
|
||||
|
||||
if (sz % 2)
|
||||
return NULL;
|
||||
|
||||
p = s = xs_realloc(NULL, _xs_blk_size(sz / 2 + 1));
|
||||
|
||||
for (n = 0; n < sz; n += 2) {
|
||||
char *d1 = strchr(rev_hex_digits, *hex++);
|
||||
char *d2 = strchr(rev_hex_digits, *hex++);
|
||||
|
||||
if (!d1 || !d2) {
|
||||
/* decoding error */
|
||||
return xs_free(s);
|
||||
}
|
||||
|
||||
*p++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
|
||||
(0xf - ((d2 - rev_hex_digits) & 0xf));
|
||||
}
|
||||
|
||||
*p = '\0';
|
||||
*size = sz / 2;
|
||||
s = xs_realloc(NULL, _xs_blk_size(*size + 1));
|
||||
|
||||
if (!_xs_hex_dec(s, hex, sz))
|
||||
return xs_free(s);
|
||||
|
||||
s[*size] = '\0';
|
||||
|
||||
return s;
|
||||
}
|
||||
|
@ -71,14 +135,18 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size)
|
|||
int xs_is_hex(const char *str)
|
||||
/* returns 1 if str is an hex string */
|
||||
{
|
||||
if (strlen(str) % 2)
|
||||
return 0;
|
||||
|
||||
while (*str) {
|
||||
if (strchr(rev_hex_digits, *str++) == NULL)
|
||||
if (!xs_is_hex_digit(*str++))
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* _XS_H */
|
||||
|
||||
#endif /* XS_IMPLEMENTATION */
|
||||
|
||||
|
|
43
xs_socket.h
43
xs_socket.h
|
@ -7,9 +7,13 @@
|
|||
int xs_socket_timeout(int s, double rto, double sto);
|
||||
int xs_socket_server(const char *addr, const char *serv);
|
||||
FILE *xs_socket_accept(int rs);
|
||||
xs_str *xs_socket_peername(int s);
|
||||
int _xs_socket_peername(int s, char *buf, int buf_size);
|
||||
int xs_socket_connect(const char *addr, const char *serv);
|
||||
|
||||
#ifdef _XS_H
|
||||
xs_str *xs_socket_peername(int s);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XS_IMPLEMENTATION
|
||||
|
||||
|
@ -17,6 +21,9 @@ int xs_socket_connect(const char *addr, const char *serv);
|
|||
#include <netdb.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
int xs_socket_timeout(int s, double rto, double sto)
|
||||
|
@ -100,34 +107,28 @@ FILE *xs_socket_accept(int rs)
|
|||
}
|
||||
|
||||
|
||||
xs_str *xs_socket_peername(int s)
|
||||
/* returns the remote address as a string */
|
||||
int _xs_socket_peername(int s, char *buf, int buf_size)
|
||||
/* fill the buffer with the socket peername */
|
||||
{
|
||||
xs_str *ip = NULL;
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t slen = sizeof(addr);
|
||||
const char *p = NULL;
|
||||
|
||||
if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) {
|
||||
char buf[1024];
|
||||
const char *p = NULL;
|
||||
|
||||
if (addr.ss_family == AF_INET) {
|
||||
struct sockaddr_in *sa = (struct sockaddr_in *)&addr;
|
||||
|
||||
p = inet_ntop(AF_INET, &sa->sin_addr, buf, sizeof(buf));
|
||||
p = inet_ntop(AF_INET, &sa->sin_addr, buf, buf_size);
|
||||
}
|
||||
else
|
||||
if (addr.ss_family == AF_INET6) {
|
||||
struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr;
|
||||
|
||||
p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, sizeof(buf));
|
||||
p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, buf_size);
|
||||
}
|
||||
|
||||
if (p != NULL)
|
||||
ip = xs_str_new(p);
|
||||
}
|
||||
|
||||
return ip;
|
||||
return p != NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -195,6 +196,22 @@ int xs_socket_connect(const char *addr, const char *serv)
|
|||
}
|
||||
|
||||
|
||||
#ifdef _XS_H
|
||||
|
||||
xs_str *xs_socket_peername(int s)
|
||||
/* returns the remote address as a string */
|
||||
{
|
||||
char buf[2028];
|
||||
xs_str *p = NULL;
|
||||
|
||||
if (_xs_socket_peername(s, buf, sizeof(buf)))
|
||||
p = xs_str_new(buf);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
#endif /* _XS_H */
|
||||
|
||||
#endif /* XS_IMPLEMENTATION */
|
||||
|
||||
#endif /* _XS_SOCKET_H */
|
||||
|
|
202
xs_unicode.h
202
xs_unicode.h
|
@ -5,7 +5,6 @@
|
|||
#define _XS_UNICODE_H
|
||||
|
||||
int _xs_utf8_enc(char buf[4], unsigned int cpoint);
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
|
||||
unsigned int xs_utf8_dec(char **str);
|
||||
int xs_unicode_width(unsigned int cpoint);
|
||||
int xs_is_surrogate(unsigned int cpoint);
|
||||
|
@ -21,13 +20,20 @@
|
|||
int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint);
|
||||
int xs_unicode_is_alpha(unsigned int cpoint);
|
||||
|
||||
#ifdef _XS_H
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
|
||||
#endif
|
||||
|
||||
#ifdef XS_IMPLEMENTATION
|
||||
|
||||
#ifndef countof
|
||||
#define countof(a) (sizeof((a)) / sizeof((*a)))
|
||||
#endif
|
||||
|
||||
int _xs_utf8_enc(char buf[4], unsigned int cpoint)
|
||||
/* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */
|
||||
{
|
||||
unsigned char *p = (unsigned char *)buf;
|
||||
char *p = buf;
|
||||
|
||||
if (cpoint < 0x80) /* 1 byte char */
|
||||
*p++ = cpoint & 0xff;
|
||||
|
@ -48,27 +54,16 @@ int _xs_utf8_enc(char buf[4], unsigned int cpoint)
|
|||
*p++ = 0x80 | (cpoint & 0x3f);
|
||||
}
|
||||
|
||||
return p - (unsigned char *)buf;
|
||||
}
|
||||
|
||||
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
|
||||
/* encodes an Unicode codepoint to utf-8 into str */
|
||||
{
|
||||
char tmp[4];
|
||||
|
||||
int c = _xs_utf8_enc(tmp, cpoint);
|
||||
|
||||
return xs_append_m(str, tmp, c);
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
|
||||
unsigned int xs_utf8_dec(char **str)
|
||||
/* decodes an utf-8 char inside str and updates the pointer */
|
||||
{
|
||||
unsigned char *p = (unsigned char *)*str;
|
||||
char *p = *str;
|
||||
unsigned int cpoint = 0;
|
||||
int c = *p++;
|
||||
unsigned char c = *p++;
|
||||
int cb = 0;
|
||||
|
||||
if ((c & 0x80) == 0) { /* 1 byte char */
|
||||
|
@ -91,30 +86,19 @@ unsigned int xs_utf8_dec(char **str)
|
|||
}
|
||||
|
||||
/* process the continuation bytes */
|
||||
while (cb--) {
|
||||
if ((*p & 0xc0) == 0x80)
|
||||
cpoint |= (*p++ & 0x3f) << (cb * 6);
|
||||
else {
|
||||
cpoint = 0xfffd;
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (cb > 0 && *p && (*p & 0xc0) == 0x80)
|
||||
cpoint |= (*p++ & 0x3f) << (--cb * 6);
|
||||
|
||||
*str = (char *)p;
|
||||
/* incomplete or broken? */
|
||||
if (cb)
|
||||
cpoint = 0xfffd;
|
||||
|
||||
*str = p;
|
||||
return cpoint;
|
||||
}
|
||||
|
||||
|
||||
static int int_range_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
const unsigned int *a = p1;
|
||||
const unsigned int *b = p2;
|
||||
|
||||
return *a < b[0] ? -1 : *a > b[1] ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
/* intentionally dead simple */
|
||||
/** Unicode character width: intentionally dead simple **/
|
||||
|
||||
static unsigned int xs_unicode_width_table[] = {
|
||||
0x300, 0x36f, 0, /* diacritics */
|
||||
|
@ -132,12 +116,23 @@ static unsigned int xs_unicode_width_table[] = {
|
|||
int xs_unicode_width(unsigned int cpoint)
|
||||
/* returns the width in columns of a Unicode codepoint (somewhat simplified) */
|
||||
{
|
||||
unsigned int *r = bsearch(&cpoint, xs_unicode_width_table,
|
||||
sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3),
|
||||
sizeof(unsigned int) * 3,
|
||||
int_range_cmp);
|
||||
int b = 0;
|
||||
int t = countof(xs_unicode_width_table) / 3 - 1;
|
||||
|
||||
return r ? r[2] : 1;
|
||||
while (t >= b) {
|
||||
int n = (b + t) / 2;
|
||||
unsigned int *p = &xs_unicode_width_table[n * 3];
|
||||
|
||||
if (cpoint < p[0])
|
||||
t = n - 1;
|
||||
else
|
||||
if (cpoint > p[1])
|
||||
b = n + 1;
|
||||
else
|
||||
return p[2];
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -167,53 +162,62 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
|
|||
}
|
||||
|
||||
|
||||
#ifdef _XS_H
|
||||
|
||||
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
|
||||
/* encodes an Unicode codepoint to utf-8 into str */
|
||||
{
|
||||
char tmp[4];
|
||||
|
||||
int c = _xs_utf8_enc(tmp, cpoint);
|
||||
|
||||
return xs_append_m(str, tmp, c);
|
||||
}
|
||||
|
||||
#endif /* _XS_H */
|
||||
|
||||
|
||||
#ifdef _XS_UNICODE_TBL_H
|
||||
|
||||
/* include xs_unicode_tbl.h before this one to use these functions */
|
||||
|
||||
static int int_cmp(const void *p1, const void *p2)
|
||||
{
|
||||
const unsigned int *a = p1;
|
||||
const unsigned int *b = p2;
|
||||
|
||||
return *a < *b ? -1 : *a > *b ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
unsigned int *_xs_unicode_upper_search(unsigned int cpoint)
|
||||
/* searches for an uppercase codepoint in the case fold table */
|
||||
{
|
||||
return bsearch(&cpoint, xs_unicode_case_fold_table,
|
||||
sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2),
|
||||
sizeof(unsigned int) * 2,
|
||||
int_cmp);
|
||||
}
|
||||
int b = 0;
|
||||
int t = countof(xs_unicode_case_fold_table) / 2 + 1;
|
||||
|
||||
while (t >= b) {
|
||||
int n = (b + t) / 2;
|
||||
unsigned int *p = &xs_unicode_case_fold_table[n * 2];
|
||||
|
||||
unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
|
||||
/* searches for a lowercase codepoint in the case fold table */
|
||||
{
|
||||
unsigned int *p = xs_unicode_case_fold_table + 1;
|
||||
unsigned int *e = xs_unicode_case_fold_table +
|
||||
sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int);
|
||||
|
||||
while (p < e) {
|
||||
if (cpoint == *p)
|
||||
if (cpoint < p[0])
|
||||
t = n - 1;
|
||||
else
|
||||
if (cpoint > p[0])
|
||||
b = n + 1;
|
||||
else
|
||||
return p;
|
||||
|
||||
p += 2;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
unsigned int xs_unicode_to_upper(unsigned int cpoint)
|
||||
/* returns the cpoint to uppercase */
|
||||
unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
|
||||
/* searches for a lowercase codepoint in the case fold table */
|
||||
{
|
||||
unsigned int *p = _xs_unicode_lower_search(cpoint);
|
||||
unsigned int *p = xs_unicode_case_fold_table;
|
||||
unsigned int *e = p + countof(xs_unicode_case_fold_table);
|
||||
|
||||
return p == NULL ? cpoint : p[-1];
|
||||
while (p < e) {
|
||||
if (cpoint == p[1])
|
||||
return p;
|
||||
|
||||
p += 2;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
@ -226,20 +230,40 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint)
|
|||
}
|
||||
|
||||
|
||||
unsigned int xs_unicode_to_upper(unsigned int cpoint)
|
||||
/* returns the cpoint to uppercase */
|
||||
{
|
||||
unsigned int *p = _xs_unicode_lower_search(cpoint);
|
||||
|
||||
return p == NULL ? cpoint : p[0];
|
||||
}
|
||||
|
||||
|
||||
int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac)
|
||||
/* applies unicode Normalization Form D */
|
||||
{
|
||||
unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table,
|
||||
sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3),
|
||||
sizeof(unsigned int) * 3,
|
||||
int_cmp);
|
||||
int b = 0;
|
||||
int t = countof(xs_unicode_nfd_table) / 3 - 1;
|
||||
|
||||
if (r != NULL) {
|
||||
*base = r[1];
|
||||
*diac = r[2];
|
||||
while (t >= b) {
|
||||
int n = (b + t) / 2;
|
||||
unsigned int *p = &xs_unicode_nfd_table[n * 3];
|
||||
|
||||
int c = cpoint - p[0];
|
||||
|
||||
if (c < 0)
|
||||
t = n - 1;
|
||||
else
|
||||
if (c > 0)
|
||||
b = n + 1;
|
||||
else {
|
||||
*base = p[1];
|
||||
*diac = p[2];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return !!r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -247,8 +271,7 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
|
|||
/* applies unicode Normalization Form C */
|
||||
{
|
||||
unsigned int *p = xs_unicode_nfd_table;
|
||||
unsigned int *e = xs_unicode_nfd_table +
|
||||
sizeof(xs_unicode_nfd_table) / sizeof(unsigned int);
|
||||
unsigned int *e = p + countof(xs_unicode_nfd_table);
|
||||
|
||||
while (p < e) {
|
||||
if (p[1] == base && p[2] == diac) {
|
||||
|
@ -266,12 +289,23 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
|
|||
int xs_unicode_is_alpha(unsigned int cpoint)
|
||||
/* checks if a codepoint is an alpha (i.e. a letter) */
|
||||
{
|
||||
unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table,
|
||||
sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2),
|
||||
sizeof(unsigned int) * 2,
|
||||
int_range_cmp);
|
||||
int b = 0;
|
||||
int t = countof(xs_unicode_alpha_table) / 2 - 1;
|
||||
|
||||
return !!r;
|
||||
while (t >= b) {
|
||||
int n = (b + t) / 2;
|
||||
unsigned int *p = &xs_unicode_alpha_table[n * 2];
|
||||
|
||||
if (cpoint < p[0])
|
||||
t = n - 1;
|
||||
else
|
||||
if (cpoint > p[1])
|
||||
b = n + 1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
/* 3582ff265e19407df1d532eb1d90c372fe22ca62 2023-12-08T06:10:40+01:00 */
|
||||
/* fd50c72456b717bb235eec8fe5f712da5f695f2b 2023-12-27T12:51:14+01:00 */
|
||||
|
|
Loading…
Reference in a new issue