Replaced encode_html_strict() with xs_html_encode().

This commit is contained in:
default 2023-11-20 18:33:24 +01:00
parent b68ed66669
commit bc5d0d4ed0
8 changed files with 273 additions and 26 deletions

View file

@ -37,9 +37,9 @@ activitypub.o: activitypub.c xs.h xs_json.h xs_curl.h xs_mime.h \
xs_openssl.h xs_regex.h xs_time.h xs_set.h xs_match.h snac.h
data.o: data.c xs.h xs_hex.h xs_io.h xs_json.h xs_openssl.h xs_glob.h \
xs_set.h xs_time.h snac.h
format.o: format.c xs.h xs_regex.h xs_mime.h snac.h
format.o: format.c xs.h xs_regex.h xs_mime.h xs_html.h snac.h
html.o: html.c xs.h xs_io.h xs_json.h xs_regex.h xs_set.h xs_openssl.h \
xs_time.h xs_mime.h xs_match.h snac.h
xs_time.h xs_mime.h xs_match.h xs_html.h snac.h
http.o: http.c xs.h xs_io.h xs_openssl.h xs_curl.h xs_time.h xs_json.h \
snac.h
httpd.o: httpd.c xs.h xs_io.h xs_json.h xs_socket.h xs_httpd.h xs_mime.h \
@ -50,7 +50,8 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
snac.h
snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode.h xs_json.h xs_curl.h \
xs_openssl.h xs_socket.h xs_url.h xs_httpd.h xs_mime.h xs_regex.h \
xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h snac.h
xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h xs_html.h \
snac.h
upgrade.o: upgrade.c xs.h xs_io.h xs_json.h xs_glob.h snac.h
utils.o: utils.c xs.h xs_io.h xs_json.h xs_time.h xs_openssl.h \
xs_random.h xs_glob.h snac.h

View file

@ -4,6 +4,7 @@
#include "xs.h"
#include "xs_regex.h"
#include "xs_mime.h"
#include "xs_html.h"
#include "snac.h"
@ -260,23 +261,10 @@ xs_str *sanitize(const char *content)
}
xs_str *encode_html_strict(const char *str)
/* escapes html characters */
{
xs_str *encoded = xs_replace(str, "&", "&");
encoded = xs_replace_i(encoded, "<", "&lt;");
encoded = xs_replace_i(encoded, ">", "&gt;");
encoded = xs_replace_i(encoded, "\"", "&#34;");
encoded = xs_replace_i(encoded, "'", "&#39;");
return encoded;
}
xs_str *encode_html(const char *str)
/* escapes html characters */
{
xs_str *encoded = encode_html_strict(str);
xs_str *encoded = xs_html_encode((char *)str);
/* Restore only <br>. Probably safe. Let's hope nothing goes wrong with this. */
encoded = xs_replace_i(encoded, "&lt;br&gt;", "<br>");

11
html.c
View file

@ -10,6 +10,7 @@
#include "xs_time.h"
#include "xs_mime.h"
#include "xs_match.h"
#include "xs_html.h"
#include "snac.h"
@ -2137,10 +2138,10 @@ int html_get_handler(const xs_dict *req, const char *q_path,
xs *bio = not_really_markdown(xs_dict_get(snac.config, "bio"), NULL);
char *p, *v;
xs *es1 = encode_html_strict(xs_dict_get(snac.config, "name"));
xs *es2 = encode_html_strict(snac.uid);
xs *es3 = encode_html_strict(xs_dict_get(srv_config, "host"));
xs *es4 = encode_html_strict(bio);
xs *es1 = xs_html_encode(xs_dict_get(snac.config, "name"));
xs *es2 = xs_html_encode(snac.uid);
xs *es3 = xs_html_encode(xs_dict_get(srv_config, "host"));
xs *es4 = xs_html_encode(bio);
rss = xs_fmt(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<rss version=\"0.91\">\n"
@ -2168,7 +2169,7 @@ int html_get_handler(const xs_dict *req, const char *q_path,
if (!xs_startswith(id, snac.actor))
continue;
xs *content = encode_html_strict(xs_dict_get(msg, "content"));
xs *content = xs_html_encode(xs_dict_get(msg, "content"));
// We SHOULD only use sanitized one for description.
// So, only encode for feed title, while the description just keep it sanitized as is.

1
snac.c
View file

@ -21,6 +21,7 @@
#include "xs_random.h"
#include "xs_match.h"
#include "xs_fcgi.h"
#include "xs_html.h"
#include "snac.h"

3
snac.h
View file

@ -1,7 +1,7 @@
/* snac - A simple, minimalistic ActivityPub instance */
/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */
#define VERSION "2.43"
#define VERSION "2.44-dev"
#define USER_AGENT "snac/" VERSION
@ -266,7 +266,6 @@ int activitypub_post_handler(const xs_dict *req, const char *q_path,
xs_str *not_really_markdown(const char *content, xs_list **attach);
xs_str *sanitize(const char *content);
xs_str *encode_html_strict(const char *str);
xs_str *encode_html(const char *str);
xs_str *html_timeline(snac *user, const xs_list *list, int local,

19
xs.h
View file

@ -62,7 +62,8 @@ xs_str *xs_str_new(const char *str);
xs_str *xs_str_new_sz(const char *mem, int sz);
xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix);
#define xs_str_prepend_i(str, prefix) xs_str_wrap_i(prefix, str, NULL)
#define xs_str_cat(str, suffix) xs_str_wrap_i(NULL, str, suffix)
xs_str *_xs_str_cat(xs_str *str, const char *strs[]);
#define xs_str_cat(str, ...) _xs_str_cat(str, (const char *[]){ __VA_ARGS__, NULL })
xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times);
#define xs_replace_i(str, sfrom, sto) xs_replace_in(str, sfrom, sto, XS_ALL)
#define xs_replace(str, sfrom, sto) xs_replace_in(xs_dup(str), sfrom, sto, XS_ALL)
@ -451,6 +452,22 @@ xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix)
}
xs_str *_xs_str_cat(xs_str *str, const char *strs[])
/* concatenates all strings after str */
{
int o = strlen(str);
while (*strs) {
int sz = strlen(*strs);
str = xs_insert_m(str, o, *strs, sz);
o += sz;
strs++;
}
return str;
}
xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times)
/* replaces inline all sfrom with sto */
{

240
xs_html.h Normal file
View file

@ -0,0 +1,240 @@
/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */
#ifndef _XS_HTML_H
#define _XS_HTML_H
typedef struct xs_html xs_html;
xs_str *xs_html_encode(char *str);
xs_html *xs_html_attr(char *key, char *value);
xs_html *xs_html_text(char *content);
xs_html *xs_html_raw(char *content);
xs_html *xs_html_add(xs_html *tag, xs_html *data);
xs_html *_xs_html_tag(char *tag, xs_html *var[]);
#define xs_html_tag(tag, ...) _xs_html_tag(tag, (xs_html *[]) { __VA_ARGS__, NULL })
xs_html *_xs_html_sctag(char *tag, xs_html *var[]);
#define xs_html_sctag(tag, ...) _xs_html_sctag(tag, (xs_html *[]) { __VA_ARGS__, NULL })
xs_str *_xs_html_render(xs_html *h, xs_str *s);
#define xs_html_render(h) _xs_html_render(h, xs_str_new(NULL))
#ifdef XS_IMPLEMENTATION
typedef enum {
XS_HTML_TAG,
XS_HTML_SCTAG,
XS_HTML_ATTR,
XS_HTML_TEXT
} xs_html_type;
struct xs_html {
xs_html_type type;
xs_str *content;
xs_html *f_attr;
xs_html *l_attr;
xs_html *f_tag;
xs_html *l_tag;
xs_html *next;
};
xs_str *xs_html_encode(char *str)
/* encodes str using HTML entities */
{
xs_str *s = xs_str_new(NULL);
int o = 0;
char *e = str + strlen(str);
for (;;) {
char *ec = "<>\"'&"; /* characters to escape */
char *q = e;
int z;
/* find the nearest happening of a char */
while (*ec) {
char *m = memchr(str, *ec++, q - str);
if (m)
q = m;
}
/* copy string to here */
z = q - str;
s = xs_insert_m(s, o, str, z);
o += z;
/* if q points to the end, nothing more to do */
if (q == e)
break;
/* insert the escaped char */
char tmp[8];
snprintf(tmp, sizeof(tmp), "&#%d;", *q);
z = strlen(tmp);
s = xs_insert_m(s, o, tmp, z);
o += z;
str = q + 1;
}
return s;
}
#define XS_HTML_NEW() memset(xs_realloc(NULL, sizeof(xs_html)), '\0', sizeof(xs_html))
xs_html *xs_html_attr(char *key, char *value)
/* creates an HTML block with an attribute */
{
xs_html *a = XS_HTML_NEW();
a->type = XS_HTML_ATTR;
if (value) {
xs *ev = xs_html_encode(value);
a->content = xs_fmt("%s=\"%s\"", key, ev);
}
else
a->content = xs_dup(key);
return a;
}
xs_html *xs_html_text(char *content)
/* creates an HTML block of text, escaping it previously */
{
xs_html *a = XS_HTML_NEW();
a->type = XS_HTML_TEXT;
a->content = xs_html_encode(content);
return a;
}
xs_html *xs_html_raw(char *content)
/* creates an HTML block without escaping (for pre-formatted HTML, comments, etc) */
{
xs_html *a = XS_HTML_NEW();
a->type = XS_HTML_TEXT;
a->content = xs_dup(content);
return a;
}
xs_html *xs_html_add(xs_html *tag, xs_html *data)
/* add data (attrs, tags or text) to a tag */
{
xs_html **first;
xs_html **last;
if (data->type == XS_HTML_ATTR) {
first = &tag->f_attr;
last = &tag->l_attr;
}
else {
first = &tag->f_tag;
last = &tag->l_tag;
}
if (*first == NULL)
*first = data;
if (*last != NULL)
(*last)->next = data;
*last = data;
return tag;
}
static xs_html *_xs_html_tag_t(xs_html_type type, char *tag, xs_html *var[])
/* creates a tag with a variable list of attributes and subtags */
{
xs_html *a = XS_HTML_NEW();
a->type = type;
a->content = xs_dup(tag);
while (*var)
xs_html_add(a, *var++);
return a;
}
xs_html *_xs_html_tag(char *tag, xs_html *var[])
{
return _xs_html_tag_t(XS_HTML_TAG, tag, var);
}
xs_html *_xs_html_sctag(char *tag, xs_html *var[])
{
return _xs_html_tag_t(XS_HTML_SCTAG, tag, var);
}
xs_str *_xs_html_render(xs_html *h, xs_str *s)
/* renders the tag and its subtags */
{
xs_html *st;
switch (h->type) {
case XS_HTML_TAG:
case XS_HTML_SCTAG:
s = xs_str_cat(s, "<", h->content);
/* render the attributes */
st = h->f_attr;
while (st) {
xs_html *nst = st->next;
s = _xs_html_render(st, s);
st = nst;
}
if (h->type == XS_HTML_SCTAG) {
/* self-closing tags should not have subtags */
s = xs_str_cat(s, "/>");
}
else {
s = xs_str_cat(s, ">");
/* render the subtags */
st = h->f_tag;
while (st) {
xs_html *nst = st->next;
s = _xs_html_render(st, s);
st = nst;
}
s = xs_str_cat(s, "</", h->content, ">");
}
break;
case XS_HTML_ATTR:
s = xs_str_cat(s, " ", h->content);
break;
case XS_HTML_TEXT:
s = xs_str_cat(s, h->content);
break;
}
xs_free(h->content);
xs_free(h);
return s;
}
#endif /* XS_IMPLEMENTATION */
#endif /* _XS_HTML_H */

View file

@ -1 +1 @@
/* 416f5ffa99ecd4a3ec25d273b986d3d99dc92d22 */
/* 63beb583926bb5dfec89e1d694172cc887614460 2023-11-19T19:51:05+01:00 */