From 6ede6497ad2a63e5ff2b57abba827d0da469f923 Mon Sep 17 00:00:00 2001
From: default <nobody@localhost>
Date: Thu, 17 Aug 2023 17:38:39 +0200
Subject: [PATCH] Convert 'Link' attachments that have a media extension to
 something more useful.

---
 html.c       | 10 ++++++
 xs_mime.h    | 90 +++++++++++++++++++++++++++++-----------------------
 xs_regex.h   | 10 ++++--
 xs_unicode.h | 51 +++++++++++++++++++++++++----
 xs_version.h |  2 +-
 5 files changed, 112 insertions(+), 51 deletions(-)

diff --git a/html.c b/html.c
index 8e45eba..e132f17 100644
--- a/html.c
+++ b/html.c
@@ -1313,6 +1313,16 @@ xs_str *html_entry(snac *user, xs_str *os, const xs_dict *msg, int local,
             if (xs_is_null(url))
                 continue;
 
+            /* if it's a plain Link, check if it can be "rewritten" */
+            if (strcmp(t, "Link") == 0) {
+                const char *mt = xs_mime_by_ext(url);
+
+                if (xs_startswith(mt, "image/") ||
+                    xs_startswith(mt, "audio/") ||
+                    xs_startswith(mt, "video/"))
+                    t = mt;
+            }
+
             const char *name = xs_dict_get(v, "name");
             if (xs_is_null(name))
                 name = xs_dict_get(msg, "name");
diff --git a/xs_mime.h b/xs_mime.h
index ef7affe..2c8eaa9 100644
--- a/xs_mime.h
+++ b/xs_mime.h
@@ -6,57 +6,67 @@
 
 const char *xs_mime_by_ext(const char *file);
 
+extern const char *xs_mime_types[];
+
 #ifdef XS_IMPLEMENTATION
 
 /* intentionally brain-dead simple */
-struct _mime_info {
-    const char *type;
-    const char *ext;
-} mime_info[] = {
-    { "application/json",   ".json" },
-    { "image/gif",          ".gif" },
-    { "image/jpeg",         ".jpeg" },
-    { "image/jpeg",         ".jpg" },
-    { "image/png",          ".png" },
-    { "image/webp",         ".webp" },
-    { "video/mp4",          ".mp4" },
-    { "video/mp4",          ".mpg4" },
-    { "video/mp4",          ".m4v" },
-    { "video/webm",         ".webm" },
-    { "video/quicktime",    ".mov" },
-    { "video/3gpp",         ".3gp" },
-    { "video/ogg",          ".ogv" },
-    { "video/flv",          ".flv" },
-    { "audio/mp3",          ".mp3" },
-    { "audio/ogg",          ".ogg" },
-    { "audio/ogg",          ".oga" },
-    { "audio/ogg",          ".opus" },
-    { "audio/flac",         ".flac" },
-    { "audio/wav",          ".wav" },
-    { "audio/wma",          ".wma" },
-    { "audio/aac",          ".aac" },
-    { "audio/aac",          ".m4a" },
-    { "text/css",           ".css" },
-    { "text/html",          ".html" },
-    { "text/plain",         ".txt" },
-    { "text/xml",           ".xml" },
-    { "text/markdown",      ".md" },
-    { "text/gemini",        ".gmi" },
-    { NULL, NULL }
+/* CAUTION: sorted */
+
+const char *xs_mime_types[] = {
+    "3gp",      "video/3gpp",
+    "aac",      "audio/aac",
+    "css",      "text/css",
+    "flac",     "audio/flac",
+    "flv",      "video/flv",
+    "gif",      "image/gif",
+    "gmi",      "text/gemini",
+    "html",     "text/html",
+    "jpeg",     "image/jpeg",
+    "jpg",      "image/jpeg",
+    "json",     "application/json",
+    "m4a",      "audio/aac",
+    "m4v",      "video/mp4",
+    "md",       "text/markdown",
+    "mov",      "video/quicktime",
+    "mp3",      "audio/mp3",
+    "mp4",      "video/mp4",
+    "mpg4",     "video/mp4",
+    "oga",      "audio/ogg",
+    "ogg",      "audio/ogg",
+    "ogv",      "video/ogg",
+    "opus",     "audio/ogg",
+    "png",      "image/png",
+    "txt",      "text/plain",
+    "wav",      "audio/wav",
+    "webm",     "video/webm",
+    "webp",     "image/webp",
+    "wma",      "audio/wma",
+    "xml",      "text/xml",
+    NULL,       NULL,
 };
 
 
 const char *xs_mime_by_ext(const char *file)
 /* returns the MIME type by file extension */
 {
-    struct _mime_info *mi = mime_info;
-    xs *lfile = xs_tolower_i(xs_dup(file));
+    const char *ext = strrchr(file, '.');
 
-    while (mi->type != NULL) {
-        if (xs_endswith(lfile, mi->ext))
-            return mi->type;
+    if (ext) {
+        const char **p = xs_mime_types;
+        xs *uext       = xs_tolower_i(xs_dup(ext + 1));
 
-        mi++;
+        while (**p) {
+            int c;
+
+            if ((c = strcmp(*p, uext)) == 0)
+                return p[1];
+            else
+            if (c > 0)
+                break;
+
+            p += 2;
+        }
     }
 
     return "application/octet-stream";
diff --git a/xs_regex.h b/xs_regex.h
index 6fb6cca..7e1c80f 100644
--- a/xs_regex.h
+++ b/xs_regex.h
@@ -8,8 +8,10 @@ xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
 #define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
 xs_list *xs_regex_match_n(const char *str, const char *rx, int count);
 #define xs_regex_match(str, rx) xs_regex_match_n(str, rx, XS_ALL)
-xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count);
-#define xs_regex_replace(str, rx, rep) xs_regex_replace_n(str, rx, rep, XS_ALL)
+xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count);
+#define xs_regex_replace_i(str, rx, rep) xs_regex_replace_in(str, rx, rep, XS_ALL)
+#define xs_regex_replace_n(str, rx, rep, count) xs_regex_replace_in(xs_dup(str), rx, rep, count)
+#define xs_regex_replace(str, rx, rep) xs_regex_replace_in(xs_dup(str), rx, rep, XS_ALL)
 
 #ifdef XS_IMPLEMENTATION
 
@@ -78,7 +80,7 @@ xs_list *xs_regex_match_n(const char *str, const char *rx, int count)
 }
 
 
-xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count)
+xs_list *xs_regex_replace_in(xs_str *str, const char *rx, const char *rep, int count)
 /* replaces all matches with the rep string. If it contains unescaped &,
    they are replaced with the match */
 {
@@ -121,6 +123,8 @@ xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, in
         n++;
     }
 
+    xs_free(str);
+
     return s;
 }
 
diff --git a/xs_unicode.h b/xs_unicode.h
index 48cd660..35cd9f7 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -4,8 +4,10 @@
 
 #define _XS_UNICODE_H
 
+ int _xs_utf8_enc(char buf[4], unsigned int cpoint);
  xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
  unsigned int xs_utf8_dec(char **str);
+ int xs_unicode_width(unsigned int cpoint);
  unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
  unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
  #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
@@ -18,8 +20,8 @@
 #ifdef XS_IMPLEMENTATION
 
 
-char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
-/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
+int _xs_utf8_enc(char buf[4], unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */
 {
     unsigned char *p = (unsigned char *)buf;
 
@@ -42,18 +44,18 @@ char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
         *p++ = 0x80 | (cpoint & 0x3f);
     }
 
-    return (char *)p;
+    return p - (unsigned char *)buf;
 }
 
 
 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
 /* encodes an Unicode codepoint to utf-8 into str */
 {
-    char tmp[4], *p;
+    char tmp[4];
 
-    p = _xs_utf8_enc(tmp, cpoint);
+    int c = _xs_utf8_enc(tmp, cpoint);
 
-    return xs_append_m(str, tmp, p - tmp);
+    return xs_append_m(str, tmp, c);
 }
 
 
@@ -99,9 +101,44 @@ unsigned int xs_utf8_dec(char **str)
 }
 
 
+/* intentionally dead simple */
+
+static unsigned int xs_unicode_width_table[] = {
+    0x300,      0x36f,      0,      /* diacritics */
+    0x1100,     0x11ff,     2,      /* Hangul */
+    0x2e80,     0xa4cf,     2,      /* CJK */
+    0xac00,     0xd7a3,     2,      /* more Hangul */
+    0xe000,     0xf8ff,     0,      /* private use */
+    0xf900,     0xfaff,     2,      /* CJK compatibility */
+    0xff00,     0xff60,     2,      /* full width things */
+    0xffdf,     0xffe6,     2,      /* full width things */
+    0x1f200,    0x1ffff,    2,      /* emojis */
+    0x20000,    0x2fffd,    2       /* more CJK */
+};
+
+int xs_unicode_width(unsigned int cpoint)
+/* returns the width in columns of a Unicode codepoint (somewhat simplified) */
+{
+    unsigned int *p = xs_unicode_width_table;
+    unsigned int *e = p + sizeof(xs_unicode_width_table) / sizeof(unsigned int);
+
+    while (p < e) {
+        if (cpoint < p[0])
+            return 1;
+
+        if (cpoint >= p[0] && cpoint <= p[1])
+            return p[2];
+
+        p += 3;
+    }
+
+    return 0;
+}
+
+
 #ifdef _XS_UNICODE_TBL_H
 
-/* include xs_unicode_tbl.h before to use these functions */
+/* include xs_unicode_tbl.h before this one to use these functions */
 
 static int int_cmp(const void *p1, const void *p2)
 {
diff --git a/xs_version.h b/xs_version.h
index ae43ff4..8b2dea3 100644
--- a/xs_version.h
+++ b/xs_version.h
@@ -1 +1 @@
-/* b7e9713d90382d8da0b58023f4c78416e6ca1bc5 */
+/* e85f257dd8fcb2980fd21aa37c1594c1461ddf48 */