From 3ab733cdf5a71b9a27399e8336e0c236c13d67fb Mon Sep 17 00:00:00 2001 From: default Date: Wed, 8 May 2024 10:20:25 +0200 Subject: [PATCH] New function search_by_content(). --- data.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 18 ++++++++++++++++++ snac.h | 3 +++ 3 files changed, 79 insertions(+) diff --git a/data.c b/data.c index 4e1682c..bc9f979 100644 --- a/data.c +++ b/data.c @@ -2488,6 +2488,64 @@ void notify_clear(snac *snac) } +/** searches **/ + +xs_list *search_by_content(snac *user, const xs_list *timeline, + const char *regex, int timeout) +/* returns a list of posts which content matches the regex */ +{ + xs_list *r = xs_list_new(); + + if (timeout == 0) + timeout = 3; + + int c = 0; + char *v; + + time_t t = time(NULL) + timeout; + + while (xs_list_next(timeline, &v, &c)) { + xs *post = NULL; + + /* timeout? */ + if (time(NULL) > t) + break; + + int status; + + if (user) + status = timeline_get_by_md5(user, v, &post); + else + status = object_get_by_md5(v, &post); + + if (!valid_status(status)) + continue; + + /* must be a Note */ + if (strcmp(xs_dict_get_def(post, "type", ""), "Note")) + continue; + + char *content = xs_dict_get(post, "content"); + + if (xs_is_null(content)) + continue; + + /* strip HTML */ + xs *c = xs_regex_replace(content, "<[^>]+>", " "); + c = xs_regex_replace_i(c, " {2,}", " "); + c = xs_tolower_i(c); + + /* apply regex */ + xs *l = xs_regex_select_n(c, regex, 1); + + if (xs_list_len(l)) + r = xs_list_append(r, v); + } + + return r; +} + + /** the queue **/ static xs_dict *_enqueue_put(const char *fn, xs_dict *msg) diff --git a/main.c b/main.c index 6a38412..2e1a77c 100644 --- a/main.c +++ b/main.c @@ -44,6 +44,7 @@ int usage(void) printf("limit {basedir} {uid} {actor} Limits an actor (drops their announces)\n"); printf("unlimit {basedir} {uid} {actor} Unlimits an actor\n"); printf("verify_links {basedir} {uid} Verifies a user's links (in the metadata)\n"); + printf("search {basedir} {uid} {regex} Searches posts by content\n"); return 1; } @@ -374,6 +375,23 @@ int main(int argc, char *argv[]) return 0; } + if (strcmp(cmd, "search") == 0) { /** **/ + xs *tl = timeline_simple_list(&snac, "private", 0, XS_ALL); + + /* 'url' contains the regex */ + xs *r = search_by_content(&snac, tl, url, 10); + + int c = 0; + char *v; + + /* print results as standalone links */ + while (xs_list_next(r, &v, &c)) { + printf("%s/admin/p/%s\n", snac.actor, v); + } + + return 0; + } + if (strcmp(cmd, "ping") == 0) { /** **/ xs *actor_o = NULL; diff --git a/snac.h b/snac.h index 2a988f1..ae8fc59 100644 --- a/snac.h +++ b/snac.h @@ -179,6 +179,9 @@ xs_list *list_timeline(snac *user, const char *list, int skip, int show); xs_val *list_content(snac *user, const char *list_id, const char *actor_md5, int op); void list_distribute(snac *user, const char *who, const xs_dict *post); +xs_list *search_by_content(snac *user, const xs_list *timeline, + const char *regex, int timeout); + int actor_add(const char *actor, xs_dict *msg); int actor_get(const char *actor, xs_dict **data); int actor_get_refresh(snac *user, const char *actor, xs_dict **data);