Be more aggressive in HTML sanitization.

This commit is contained in:
default 2022-10-28 18:06:42 +02:00
parent 2b590cd90d
commit 5ee9504b18
3 changed files with 55 additions and 1 deletions

View file

@ -150,3 +150,54 @@ d_char *not_really_markdown(char *content, d_char **f_content)
return *f_content;
}
const char *valid_tags[] = {
"a", "p", "br", "img", "blockquote", "ul", "li", "span", NULL
};
d_char *sanitize(d_char *content)
/* cleans dangerous HTML output */
{
d_char *s = xs_str_new(NULL);
xs *sl;
int n = 0;
char *p, *v;
sl = xs_regex_split(content, "</?[^>]+>");
p = sl;
while (xs_list_iter(&p, &v)) {
if (n & 0x1) {
xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
xs *l1 = xs_split_n(s1, " ", 1);
xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0)));
int i;
/* check if it's one of the valid tags */
for (i = 0; valid_tags[i]; i++) {
if (strcmp(tag, valid_tags[i]) == 0)
break;
}
if (valid_tags[i]) {
/* accepted tag */
s = xs_str_cat(s, v);
}
else {
/* bad tag */
xs *s2 = xs_replace(v, "<", "&lt;");
s = xs_str_cat(s, s2);
}
}
else {
/* non-tag */
s = xs_str_cat(s, v);
}
n++;
}
return s;
}

3
html.c
View file

@ -600,8 +600,9 @@ d_char *html_entry(snac *snac, d_char *os, char *msg, xs_set *seen, int local, i
}
}
xs *sc = sanitize(c);
s = xs_str_cat(s, c);
s = xs_str_cat(s, sc);
}
s = xs_str_cat(s, "\n");

2
snac.h
View file

@ -134,6 +134,8 @@ int activitypub_post_handler(d_char *req, char *q_path,
char **body, int *b_size, char **ctype);
d_char *not_really_markdown(char *content, d_char **f_content);
d_char *sanitize(d_char *str);
int html_get_handler(d_char *req, char *q_path, char **body, int *b_size, char **ctype);
int html_post_handler(d_char *req, char *q_path, d_char *payload, int p_size,
char **body, int *b_size, char **ctype);