[indexer] Use ICU regex to handle unicode characters in mastodon/bluesky domains

Improve regex, (ICU required for unicode character support) Also add tests for umlauts, add generated "Testing" folder to gitignore, and clean up url::UrlEncode a bit
TODO: android doesn't build
Signed-off-by: Harry Bond <me@hbond.xyz>
This commit is contained in:
Harry Bond
2025-07-26 17:12:37 +01:00
parent 8174eac134
commit 34e9b17c33
5 changed files with 65 additions and 35 deletions

View File

@@ -80,6 +80,7 @@ UNIT_TEST(Url_Encode)
TEST_EQUAL(UrlEncode("%% "), "%25%25%20", ());
TEST_EQUAL(UrlEncode("20"), "20", ());
TEST_EQUAL(UrlEncode("Guinea-Bissau"), "Guinea-Bissau", ());
TEST_EQUAL(UrlEncode("ümlaut"), "%C3%BCmlaut", ());
TEST_EQUAL(UrlEncode(orig1), enc1, ());
TEST_EQUAL(UrlEncode(orig2), enc2, ());
TEST_EQUAL(UrlEncode(orig3), enc3, ());
@@ -98,6 +99,8 @@ UNIT_TEST(Url_Decode)
TEST_EQUAL(UrlDecode(enc3), orig3, ());
TEST_EQUAL(UrlDecode(enc4), orig4, ());
TEST_EQUAL(UrlDecode("123+Main+St,+Seattle,+WA+98101"), "123 Main St, Seattle, WA 98101", ());
TEST_EQUAL(UrlDecode("%C3%BCmlaut"), "ümlaut", ());
}
UNIT_TEST(Url_Invalid)
@@ -127,6 +130,11 @@ UNIT_TEST(Url_Valid)
.Host("www.sandwichparlour.com.au")
.Path("");
TestUrl("https://www.ümlaut.org.de/")
.Scheme("https")
.Host("www.ümlaut.org.de")
.Path("");
TestUrl("cm:/&test").Scheme("cm").Host("&test").Path("");
}