summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordefault <nobody@localhost>2022-10-28 18:06:42 +0200
committerdefault <nobody@localhost>2022-10-28 18:06:42 +0200
commit5ee9504b1801f106f152ef9d60620ad88454d390 (patch)
tree8974400798b1a30c2b07cdebb6f6b8d4c7d8ccc1
parent2b590cd90ddbce6b23e2ea35a8b624c90991ceb2 (diff)
Be more aggressive in HTML sanitization.
-rw-r--r--format.c51
-rw-r--r--html.c3
-rw-r--r--snac.h2
3 files changed, 55 insertions, 1 deletions
diff --git a/format.c b/format.c
index 69efbdb..1c2a97a 100644
--- a/format.c
+++ b/format.c
@@ -150,3 +150,54 @@ d_char *not_really_markdown(char *content, d_char **f_content)
return *f_content;
}
+
+
+const char *valid_tags[] = {
+ "a", "p", "br", "img", "blockquote", "ul", "li", "span", NULL
+};
+
+d_char *sanitize(d_char *content)
+/* cleans dangerous HTML output */
+{
+ d_char *s = xs_str_new(NULL);
+ xs *sl;
+ int n = 0;
+ char *p, *v;
+
+ sl = xs_regex_split(content, "</?[^>]+>");
+
+ p = sl;
+
+ while (xs_list_iter(&p, &v)) {
+ if (n & 0x1) {
+ xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
+ xs *l1 = xs_split_n(s1, " ", 1);
+ xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0)));
+ int i;
+
+ /* check if it's one of the valid tags */
+ for (i = 0; valid_tags[i]; i++) {
+ if (strcmp(tag, valid_tags[i]) == 0)
+ break;
+ }
+
+ if (valid_tags[i]) {
+ /* accepted tag */
+ s = xs_str_cat(s, v);
+ }
+ else {
+ /* bad tag */
+ xs *s2 = xs_replace(v, "<", "&lt;");
+ s = xs_str_cat(s, s2);
+ }
+ }
+ else {
+ /* non-tag */
+ s = xs_str_cat(s, v);
+ }
+
+ n++;
+ }
+
+ return s;
+}
diff --git a/html.c b/html.c
index 7fd1294..43638d5 100644
--- a/html.c
+++ b/html.c
@@ -600,8 +600,9 @@ d_char *html_entry(snac *snac, d_char *os, char *msg, xs_set *seen, int local, i
}
}
+ xs *sc = sanitize(c);
- s = xs_str_cat(s, c);
+ s = xs_str_cat(s, sc);
}
s = xs_str_cat(s, "\n");
diff --git a/snac.h b/snac.h
index f11dcc4..7a3a5f3 100644
--- a/snac.h
+++ b/snac.h
@@ -134,6 +134,8 @@ int activitypub_post_handler(d_char *req, char *q_path,
char **body, int *b_size, char **ctype);
d_char *not_really_markdown(char *content, d_char **f_content);
+d_char *sanitize(d_char *str);
+
int html_get_handler(d_char *req, char *q_path, char **body, int *b_size, char **ctype);
int html_post_handler(d_char *req, char *q_path, d_char *payload, int p_size,
char **body, int *b_size, char **ctype);