From c3b50574acaad6d2dc8128ed66203f9e11c431df Mon Sep 17 00:00:00 2001 From: default Date: Thu, 15 Feb 2024 19:24:10 +0100 Subject: [PATCH] New function verify_links(). --- main.c | 5 +++ snac.h | 2 ++ utils.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/main.c b/main.c index 08f23d5..e9b6cfe 100644 --- a/main.c +++ b/main.c @@ -247,6 +247,11 @@ int main(int argc, char *argv[]) return 0; } + if (strcmp(cmd, "verify_links") == 0) { /** **/ + verify_links(&snac); + return 0; + } + if (strcmp(cmd, "timeline") == 0) { /** **/ #if 0 xs *list = local_list(&snac, XS_ALL); diff --git a/snac.h b/snac.h index 7c3092b..51d223f 100644 --- a/snac.h +++ b/snac.h @@ -330,3 +330,5 @@ int mastoapi_put_handler(const xs_dict *req, const char *q_path, const char *payload, int p_size, char **body, int *b_size, char **ctype); void mastoapi_purge(void); + +void verify_links(snac *user); diff --git a/utils.c b/utils.c index 58d34c6..9690768 100644 --- a/utils.c +++ b/utils.c @@ -8,6 +8,8 @@ #include "xs_openssl.h" #include "xs_random.h" #include "xs_glob.h" +#include "xs_curl.h" +#include "xs_regex.h" #include "snac.h" @@ -407,3 +409,107 @@ int deluser(snac *user) return ret; } + + +void verify_links(snac *user) +/* verifies a user's links */ +{ + xs_dict *p = xs_dict_get(user->config, "metadata"); + char *k, *v; + int changed = 0; + + while (p && xs_dict_iter(&p, &k, &v)) { + /* not an https link? skip */ + if (!xs_startswith(v, "https:/" "/")) + continue; + + int status; + xs *req = NULL; + xs *payload = NULL; + int p_size = 0; + + req = xs_http_request("GET", v, NULL, NULL, 0, &status, + &payload, &p_size, 0); + + if (!valid_status(status)) + continue; + + /* extract the links */ + xs *ls = xs_regex_select(payload, "< *(a|link) +[^>]+>"); + + xs_list *lp = ls; + char *ll; + + while (xs_list_iter(&lp, &ll)) { + /* extract href and rel */ + xs *r = xs_regex_select(ll, "(href|rel) *= *(\"[^\"]*\"|'[^']*')"); + + /* must have both attributes */ + if (xs_list_len(r) != 2) + continue; + + xs *href = NULL; + int is_rel_me = 0; + xs_list *pr = r; + char *ar; + + while (xs_list_iter(&pr, &ar)) { + xs *nq = xs_dup(ar); + + nq = xs_replace_i(nq, "\"", ""); + nq = xs_replace_i(nq, "'", ""); + + xs *r2 = xs_split_n(nq, "=", 1); + if (xs_list_len(r2) != 2) + continue; + + xs *ak = xs_strip_i(xs_dup(xs_list_get(r2, 0))); + xs *av = xs_strip_i(xs_dup(xs_list_get(r2, 1))); + + if (strcmp(ak, "href") == 0) + href = xs_dup(av); + else + if (strcmp(ak, "rel") == 0) { + /* split the value by spaces */ + xs *vbs = xs_split(av, " "); + + /* is any of it "me"? */ + if (xs_list_in(vbs, "me") != -1) + is_rel_me = 1; + } + } + + /* after all this acrobatics, do we have an href and a rel="me"? */ + if (href != NULL && is_rel_me) { + /* is it the same as the actor? */ + if (strcmp(href, user->actor) == 0) { + /* got it! */ + xs *verified_at = xs_str_utctime(0, ISO_DATE_SPEC); + + user->links = xs_dict_set(user->links, v, verified_at); + + changed++; + + snac_log(user, xs_fmt("verify_links: %s at %s", v, verified_at)); + } + } + } + } + + if (changed) { + FILE *f; + + /* update the links.json file */ + xs *fn = xs_fmt("%s/links.json", user->basedir); + xs *bfn = xs_fmt("%s.bak", fn); + + rename(fn, bfn); + + if ((f = fopen(fn, "w")) != NULL) { + xs_json_dump(user->links, 4, f); + fclose(f); + } + else + rename(bfn, fn); + } +}