snac2/format.c

222 lines
5.5 KiB
C
Raw Normal View History

2022-10-07 16:30:54 +00:00
/* snac - A simple, minimalistic ActivityPub instance */
/* copyright (c) 2022 grunfink - MIT license */
#include "xs.h"
#include "xs_regex.h"
#include "snac.h"
/* emoticons, people laughing and such */
struct {
const char *key;
const char *value;
} smileys[] = {
{ ":-)", "🙂" },
{ ":-D", "😀" },
{ "X-D", "😆" },
{ ";-)", "😉" },
{ "B-)", "😎" },
{ ":-(", "😞" },
{ ":-*", "😘" },
{ ":-/", "😕" },
2022-10-16 18:23:01 +00:00
{ "8-o", "😲" },
{ "%-)", "🤪" },
{ ":_(", "😢" },
{ ":-|", "😐" },
2022-11-13 16:14:14 +00:00
{ "<3", "&#128147;" },
{ ":facepalm:", "&#129318;" },
{ ":shrug:", "&#129335;" },
2022-10-07 17:48:29 +00:00
{ ":eyeroll:", "&#128580;" },
{ ":beer:", "&#127866;" },
{ ":beers:", "&#127867;" },
2022-10-16 18:23:01 +00:00
{ ":munch:", "&#128561;" },
2022-11-13 16:14:14 +00:00
{ ":thumb:", "&#128077;" },
{ NULL, NULL }
};
2022-11-13 08:12:20 +00:00
static d_char *format_line(const char *line)
/* formats a line */
2022-10-07 16:30:54 +00:00
{
2022-11-13 08:12:20 +00:00
d_char *s = xs_str_new(NULL);
2022-10-07 16:30:54 +00:00
char *p, *v;
2022-11-13 08:12:20 +00:00
/* split by markup */
xs *sm = xs_regex_split(line,
"(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)");
int n = 0;
2022-11-13 08:12:20 +00:00
p = sm;
while (xs_list_iter(&p, &v)) {
if ((n & 0x1)) {
/* markup */
if (xs_startswith(v, "`")) {
xs *s1 = xs_crop(xs_dup(v), 1, -1);
xs *s2 = xs_fmt("<code>%s</code>", s1);
s = xs_str_cat(s, s2);
2022-10-07 16:30:54 +00:00
}
else
2022-11-13 08:12:20 +00:00
if (xs_startswith(v, "**")) {
xs *s1 = xs_crop(xs_dup(v), 2, -2);
xs *s2 = xs_fmt("<b>%s</b>", s1);
s = xs_str_cat(s, s2);
}
else
if (xs_startswith(v, "*")) {
xs *s1 = xs_crop(xs_dup(v), 1, -1);
xs *s2 = xs_fmt("<i>%s</i>", s1);
s = xs_str_cat(s, s2);
}
else
if (xs_startswith(v, "http")) {
xs *s1 = xs_fmt("<a href=\"%s\" target=\"_blank\">%s</a>", v, v);
s = xs_str_cat(s, s1);
}
else
s = xs_str_cat(s, v);
2022-10-07 16:30:54 +00:00
}
2022-11-13 08:12:20 +00:00
else
/* surrounded text, copy directly */
s = xs_str_cat(s, v);
n++;
2022-10-07 16:30:54 +00:00
}
2022-11-13 08:12:20 +00:00
return s;
}
2022-10-07 16:30:54 +00:00
2022-11-13 08:12:20 +00:00
2022-11-16 12:13:31 +00:00
d_char *not_really_markdown(const char *content)
2022-11-13 08:12:20 +00:00
/* formats a content using some Markdown rules */
{
d_char *s = xs_str_new(NULL);
int in_pre = 0;
int in_blq = 0;
xs *list;
char *p, *v;
/* work by lines */
p = list = xs_split(content, "\n");
2022-10-07 16:30:54 +00:00
while (xs_list_iter(&p, &v)) {
2022-11-13 08:12:20 +00:00
xs *ss = NULL;
2022-10-07 16:30:54 +00:00
2022-11-13 08:12:20 +00:00
if (strcmp(v, "```") == 0) {
2022-10-07 16:30:54 +00:00
if (!in_pre)
s = xs_str_cat(s, "<pre>");
else
s = xs_str_cat(s, "</pre>");
in_pre = !in_pre;
continue;
}
2022-11-13 08:12:20 +00:00
if (in_pre)
ss = xs_dup(v);
else
ss = xs_strip(format_line(v));
2022-10-07 16:30:54 +00:00
if (xs_startswith(ss, ">")) {
/* delete the > and subsequent spaces */
ss = xs_strip(xs_crop(ss, 1, 0));
if (!in_blq) {
s = xs_str_cat(s, "<blockquote>");
in_blq = 1;
}
s = xs_str_cat(s, ss);
s = xs_str_cat(s, "<br>");
continue;
}
if (in_blq) {
s = xs_str_cat(s, "</blockquote>");
in_blq = 0;
}
s = xs_str_cat(s, ss);
s = xs_str_cat(s, "<br>");
}
if (in_blq)
s = xs_str_cat(s, "</blockquote>");
if (in_pre)
s = xs_str_cat(s, "</pre>");
/* some beauty fixes */
2022-11-13 08:12:20 +00:00
s = xs_replace_i(s, "<br><br><blockquote>", "<br><blockquote>");
2022-10-07 16:30:54 +00:00
s = xs_replace_i(s, "</blockquote><br>", "</blockquote>");
2022-11-01 18:49:35 +00:00
s = xs_replace_i(s, "</pre><br>", "</pre>");
2022-10-07 16:30:54 +00:00
{
/* traditional emoticons */
int n;
for (n = 0; smileys[n].key; n++)
s = xs_replace_i(s, smileys[n].key, smileys[n].value);
}
2022-11-13 07:41:50 +00:00
return s;
2022-10-07 16:30:54 +00:00
}
const char *valid_tags[] = {
2022-11-16 12:13:31 +00:00
"a", "p", "br", "br/", "blockquote", "ul", "li",
2022-10-30 05:01:46 +00:00
"span", "i", "b", "pre", "code", "em", "strong", NULL
};
2022-11-16 12:13:31 +00:00
d_char *sanitize(const char *content)
/* cleans dangerous HTML output */
{
d_char *s = xs_str_new(NULL);
xs *sl;
int n = 0;
char *p, *v;
sl = xs_regex_split(content, "</?[^>]+>");
p = sl;
while (xs_list_iter(&p, &v)) {
if (n & 0x1) {
xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
xs *l1 = xs_split_n(s1, " ", 1);
xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0)));
xs *s2 = NULL;
int i;
/* check if it's one of the valid tags */
for (i = 0; valid_tags[i]; i++) {
if (strcmp(tag, valid_tags[i]) == 0)
break;
}
if (valid_tags[i]) {
/* accepted tag: rebuild it with only the accepted elements */
xs *el = xs_regex_match(v, "(href|rel|class|target)=\"[^\"]*\"");
xs *s3 = xs_join(el, " ");
2022-11-16 16:46:55 +00:00
s2 = xs_fmt("<%s%s%s%s>",
2022-11-16 16:49:33 +00:00
v[1] == '/' ? "/" : "", tag, xs_list_len(el) ? " " : "", s3);
}
else {
/* bad tag: escape it */
s2 = xs_replace(v, "<", "&lt;");
}
s = xs_str_cat(s, s2);
}
else {
/* non-tag */
s = xs_str_cat(s, v);
}
n++;
}
return s;
}