/* snac - A simple, minimalistic ActivityPub instance */ /* copyright (c) 2022 grunfink - MIT license */ #include "xs.h" #include "xs_regex.h" #include "snac.h" /* emoticons, people laughing and such */ struct { const char *key; const char *value; } smileys[] = { { ":-)", "🙂" }, { ":-D", "😀" }, { "X-D", "😆" }, { ";-)", "😉" }, { "B-)", "😎" }, { ":-(", "😞" }, { ":-*", "😘" }, { ":-/", "😕" }, { "8-o", "😲" }, { "%-)", "🤪" }, { ":_(", "😢" }, { ":-|", "😐" }, { ":facepalm:", "🤦" }, { ":shrug:", "🤷" }, { ":eyeroll:", "🙄" }, { ":beer:", "🍺" }, { ":beers:", "🍻" }, { ":munch:", "😱" }, { NULL, NULL } }; d_char *not_really_markdown(char *content, d_char **f_content) /* formats a content using some Markdown rules */ { d_char *s = NULL; int in_pre = 0; int in_blq = 0; xs *list; char *p, *v; xs *wrk = xs_str_new(NULL); { /* split by special markup */ xs *sm = xs_regex_split(content, "(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)"); int n = 0; p = sm; while (xs_list_iter(&p, &v)) { if ((n & 0x1)) { /* markup */ if (xs_startswith(v, "`")) { xs *s1 = xs_crop(xs_dup(v), 1, -1); xs *s2 = xs_fmt("%s", s1); wrk = xs_str_cat(wrk, s2); } else if (xs_startswith(v, "**")) { xs *s1 = xs_crop(xs_dup(v), 2, -2); xs *s2 = xs_fmt("%s", s1); wrk = xs_str_cat(wrk, s2); } else if (xs_startswith(v, "*")) { xs *s1 = xs_crop(xs_dup(v), 1, -1); xs *s2 = xs_fmt("%s", s1); wrk = xs_str_cat(wrk, s2); } else if (xs_startswith(v, "http")) { xs *s1 = xs_fmt("%s", v, v); wrk = xs_str_cat(wrk, s1); } else /* what the hell is this */ wrk = xs_str_cat(wrk, v); } else /* surrounded text, copy directly */ wrk = xs_str_cat(wrk, v); n++; } } /* now work by lines */ p = list = xs_split(wrk, "\n"); s = xs_str_new(NULL); while (xs_list_iter(&p, &v)) { xs *ss = xs_strip(xs_dup(v)); if (xs_startswith(ss, "```")) { if (!in_pre) s = xs_str_cat(s, "
");
            else
                s = xs_str_cat(s, "
"); in_pre = !in_pre; continue; } if (xs_startswith(ss, ">")) { /* delete the > and subsequent spaces */ ss = xs_strip(xs_crop(ss, 1, 0)); if (!in_blq) { s = xs_str_cat(s, "
"); in_blq = 1; } s = xs_str_cat(s, ss); s = xs_str_cat(s, "
"); continue; } if (in_blq) { s = xs_str_cat(s, "
"); in_blq = 0; } s = xs_str_cat(s, ss); s = xs_str_cat(s, "
"); } if (in_blq) s = xs_str_cat(s, ""); if (in_pre) s = xs_str_cat(s, ""); /* some beauty fixes */ s = xs_replace_i(s, "
", ""); { /* traditional emoticons */ int n; for (n = 0; smileys[n].key; n++) s = xs_replace_i(s, smileys[n].key, smileys[n].value); } *f_content = s; return *f_content; } const char *valid_tags[] = { "a", "p", "br", "br/", "img", "blockquote", "ul", "li", "span", "i", "b", "pre", "code", "em", "strong", NULL }; d_char *sanitize(d_char *content) /* cleans dangerous HTML output */ { d_char *s = xs_str_new(NULL); xs *sl; int n = 0; char *p, *v; sl = xs_regex_split(content, "]+>"); p = sl; while (xs_list_iter(&p, &v)) { if (n & 0x1) { xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1)); xs *l1 = xs_split_n(s1, " ", 1); xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0))); int i; /* check if it's one of the valid tags */ for (i = 0; valid_tags[i]; i++) { if (strcmp(tag, valid_tags[i]) == 0) break; } if (valid_tags[i]) { /* accepted tag */ s = xs_str_cat(s, v); } else { /* bad tag */ xs *s2 = xs_replace(v, "<", "<"); s = xs_str_cat(s, s2); } } else { /* non-tag */ s = xs_str_cat(s, v); } n++; } return s; }