From 6ede6497ad2a63e5ff2b57abba827d0da469f923 Mon Sep 17 00:00:00 2001 From: default Date: Thu, 17 Aug 2023 17:38:39 +0200 Subject: [PATCH] Convert 'Link' attachments that have a media extension to something more useful. --- html.c | 10 ++++++ xs_mime.h | 90 +++++++++++++++++++++++++++++----------------------- xs_regex.h | 10 ++++-- xs_unicode.h | 51 +++++++++++++++++++++++++---- xs_version.h | 2 +- 5 files changed, 112 insertions(+), 51 deletions(-) diff --git a/html.c b/html.c index 8e45eba..e132f17 100644 --- a/html.c +++ b/html.c @@ -1313,6 +1313,16 @@ xs_str *html_entry(snac *user, xs_str *os, const xs_dict *msg, int local, if (xs_is_null(url)) continue; + /* if it's a plain Link, check if it can be "rewritten" */ + if (strcmp(t, "Link") == 0) { + const char *mt = xs_mime_by_ext(url); + + if (xs_startswith(mt, "image/") || + xs_startswith(mt, "audio/") || + xs_startswith(mt, "video/")) + t = mt; + } + const char *name = xs_dict_get(v, "name"); if (xs_is_null(name)) name = xs_dict_get(msg, "name"); diff --git a/xs_mime.h b/xs_mime.h index ef7affe..2c8eaa9 100644 --- a/xs_mime.h +++ b/xs_mime.h @@ -6,57 +6,67 @@ const char *xs_mime_by_ext(const char *file); +extern const char *xs_mime_types[]; + #ifdef XS_IMPLEMENTATION /* intentionally brain-dead simple */ -struct _mime_info { - const char *type; - const char *ext; -} mime_info[] = { - { "application/json", ".json" }, - { "image/gif", ".gif" }, - { "image/jpeg", ".jpeg" }, - { "image/jpeg", ".jpg" }, - { "image/png", ".png" }, - { "image/webp", ".webp" }, - { "video/mp4", ".mp4" }, - { "video/mp4", ".mpg4" }, - { "video/mp4", ".m4v" }, - { "video/webm", ".webm" }, - { "video/quicktime", ".mov" }, - { "video/3gpp", ".3gp" }, - { "video/ogg", ".ogv" }, - { "video/flv", ".flv" }, - { "audio/mp3", ".mp3" }, - { "audio/ogg", ".ogg" }, - { "audio/ogg", ".oga" }, - { "audio/ogg", ".opus" }, - { "audio/flac", ".flac" }, - { "audio/wav", ".wav" }, - { "audio/wma", ".wma" }, - { "audio/aac", ".aac" }, - { "audio/aac", ".m4a" }, - { "text/css", ".css" }, - { "text/html", ".html" }, - { "text/plain", ".txt" }, - { "text/xml", ".xml" }, - { "text/markdown", ".md" }, - { "text/gemini", ".gmi" }, - { NULL, NULL } +/* CAUTION: sorted */ + +const char *xs_mime_types[] = { + "3gp", "video/3gpp", + "aac", "audio/aac", + "css", "text/css", + "flac", "audio/flac", + "flv", "video/flv", + "gif", "image/gif", + "gmi", "text/gemini", + "html", "text/html", + "jpeg", "image/jpeg", + "jpg", "image/jpeg", + "json", "application/json", + "m4a", "audio/aac", + "m4v", "video/mp4", + "md", "text/markdown", + "mov", "video/quicktime", + "mp3", "audio/mp3", + "mp4", "video/mp4", + "mpg4", "video/mp4", + "oga", "audio/ogg", + "ogg", "audio/ogg", + "ogv", "video/ogg", + "opus", "audio/ogg", + "png", "image/png", + "txt", "text/plain", + "wav", "audio/wav", + "webm", "video/webm", + "webp", "image/webp", + "wma", "audio/wma", + "xml", "text/xml", + NULL, NULL, }; const char *xs_mime_by_ext(const char *file) /* returns the MIME type by file extension */ { - struct _mime_info *mi = mime_info; - xs *lfile = xs_tolower_i(xs_dup(file)); + const char *ext = strrchr(file, '.'); - while (mi->type != NULL) { - if (xs_endswith(lfile, mi->ext)) - return mi->type; + if (ext) { + const char **p = xs_mime_types; + xs *uext = xs_tolower_i(xs_dup(ext + 1)); - mi++; + while (**p) { + int c; + + if ((c = strcmp(*p, uext)) == 0) + return p[1]; + else + if (c > 0) + break; + + p += 2; + } } return "application/octet-stream"; diff --git a/xs_regex.h b/xs_regex.h index 6fb6cca..7e1c80f 100644 --- a/xs_regex.h +++ b/xs_regex.h @@ -8,8 +8,10 @@ xs_list *xs_regex_split_n(const char *str, const char *rx, int count); #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL) xs_list *xs_regex_match_n(const char *str, const char *rx, int count); #define xs_regex_match(str, rx) xs_regex_match_n(str, rx, XS_ALL) -xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count); -#define xs_regex_replace(str, rx, rep) xs_regex_replace_n(str, rx, rep, XS_ALL) +xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count); +#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL) +#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count) +#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL) #ifdef XS_IMPLEMENTATION @@ -78,7 +80,7 @@ xs_list *xs_regex_match_n(const char *str, const char *rx, int count) } -xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count) +xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count) /* replaces all matches with the rep string. If it contains unescaped &, they are replaced with the match */ { @@ -121,6 +123,8 @@ xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, in n++; } + xs_free(str); + return s; } diff --git a/xs_unicode.h b/xs_unicode.h index 48cd660..35cd9f7 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -4,8 +4,10 @@ #define _XS_UNICODE_H + int _xs_utf8_enc(char buf[4], unsigned int cpoint); xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); unsigned int xs_utf8_dec(char **str); + int xs_unicode_width(unsigned int cpoint); unsigned int *_xs_unicode_upper_search(unsigned int cpoint); unsigned int *_xs_unicode_lower_search(unsigned int cpoint); #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) @@ -18,8 +20,8 @@ #ifdef XS_IMPLEMENTATION -char *_xs_utf8_enc(char buf[4], unsigned int cpoint) -/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */ +int _xs_utf8_enc(char buf[4], unsigned int cpoint) +/* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */ { unsigned char *p = (unsigned char *)buf; @@ -42,18 +44,18 @@ char *_xs_utf8_enc(char buf[4], unsigned int cpoint) *p++ = 0x80 | (cpoint & 0x3f); } - return (char *)p; + return p - (unsigned char *)buf; } xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint) /* encodes an Unicode codepoint to utf-8 into str */ { - char tmp[4], *p; + char tmp[4]; - p = _xs_utf8_enc(tmp, cpoint); + int c = _xs_utf8_enc(tmp, cpoint); - return xs_append_m(str, tmp, p - tmp); + return xs_append_m(str, tmp, c); } @@ -99,9 +101,44 @@ unsigned int xs_utf8_dec(char **str) } +/* intentionally dead simple */ + +static unsigned int xs_unicode_width_table[] = { + 0x300, 0x36f, 0, /* diacritics */ + 0x1100, 0x11ff, 2, /* Hangul */ + 0x2e80, 0xa4cf, 2, /* CJK */ + 0xac00, 0xd7a3, 2, /* more Hangul */ + 0xe000, 0xf8ff, 0, /* private use */ + 0xf900, 0xfaff, 2, /* CJK compatibility */ + 0xff00, 0xff60, 2, /* full width things */ + 0xffdf, 0xffe6, 2, /* full width things */ + 0x1f200, 0x1ffff, 2, /* emojis */ + 0x20000, 0x2fffd, 2 /* more CJK */ +}; + +int xs_unicode_width(unsigned int cpoint) +/* returns the width in columns of a Unicode codepoint (somewhat simplified) */ +{ + unsigned int *p = xs_unicode_width_table; + unsigned int *e = p + sizeof(xs_unicode_width_table) / sizeof(unsigned int); + + while (p < e) { + if (cpoint < p[0]) + return 1; + + if (cpoint >= p[0] && cpoint <= p[1]) + return p[2]; + + p += 3; + } + + return 0; +} + + #ifdef _XS_UNICODE_TBL_H -/* include xs_unicode_tbl.h before to use these functions */ +/* include xs_unicode_tbl.h before this one to use these functions */ static int int_cmp(const void *p1, const void *p2) { diff --git a/xs_version.h b/xs_version.h index ae43ff4..8b2dea3 100644 --- a/xs_version.h +++ b/xs_version.h @@ -1 +1 @@ -/* b7e9713d90382d8da0b58023f4c78416e6ca1bc5 */ +/* e85f257dd8fcb2980fd21aa37c1594c1461ddf48 */