[search] Add multi-token synonyms

Signed-off-by: x7z4w <x7z4w@noreply.codeberg.org>
Co-authored-by: patepelo <developing.anton@gmail.com>
This commit is contained in:
x7z4w
2025-10-23 13:13:56 +02:00
parent 6dcf4b039e
commit 042d497a5e
2 changed files with 90 additions and 95 deletions

View File

@@ -32,11 +32,97 @@ std::vector<UniString> const kAllowedMisprints = {
MakeUniString("fh"), // "Hernández" <-> "Fernández"
};
std::pair<UniString, UniString> const kPreprocessReplacements[] = {
{MakeUniString("пр-т"), MakeUniString("проспект")},
{MakeUniString("пр-д"), MakeUniString("проезд")},
{MakeUniString("наб-я"), MakeUniString("набережная")},
static std::pair<UniString, UniString> const kPreprocessReplacements[] = {
{MakeUniString("a. d."), MakeUniString("an den")},
{MakeUniString("arm. gen"), MakeUniString("armádneho generála")},
{MakeUniString("atr'"), MakeUniString("ambohitr'")},
{MakeUniString("a. v."), MakeUniString("asociación vecinal")},
{MakeUniString("a. vv."), MakeUniString("asociación de vecinos")},
{MakeUniString("b-dul"), MakeUniString("bulevardul")},
{MakeUniString("b.dul"), MakeUniString("bulevardul")},
{MakeUniString("b.º"), MakeUniString("barrio")},
{MakeUniString("b.v."), MakeUniString("bombeiros voluntários")},
{MakeUniString("c.c."), MakeUniString("centro comercial")},
{MakeUniString("c. e. b."), MakeUniString("ciclo do ensino básico")},
{MakeUniString("c.e.b."), MakeUniString("ciclo do ensino básico")},
{MakeUniString("c. h"), MakeUniString("camino hondo")},
{MakeUniString("c.le"), MakeUniString("calle")},
{MakeUniString("c. m."), MakeUniString("câmara municipal")},
{MakeUniString("c.m."), MakeUniString("câmara municipal")},
{MakeUniString("c.na"), MakeUniString("cascina")},
{MakeUniString("c. n."), MakeUniString("camino nuevo")},
{MakeUniString("c.po"), MakeUniString("campo")},
{MakeUniString("c.so"), MakeUniString("corso")},
{MakeUniString("c.te"), MakeUniString("corte")},
{MakeUniString("c. v."), MakeUniString("camino viejo")},
{MakeUniString("d'"), MakeUniString("de")},
{MakeUniString("e. b."), MakeUniString("escola básica")},
{MakeUniString("e.b."), MakeUniString("escola básica")},
{MakeUniString("e.p."), MakeUniString("empresa pública")},
{MakeUniString("e.t.a.r."), MakeUniString("estação de tratamento de águas residuais")},
{MakeUniString("f. c."), MakeUniString("ferrocarril")},
{MakeUniString("f.c."), MakeUniString("futebol clube")},
{MakeUniString("ff. cc."), MakeUniString("ferrocarrís")},
{MakeUniString("f.ta"), MakeUniString("fondamenta")},
{MakeUniString("g.n.r."), MakeUniString("guarda nacional republicana")},
{MakeUniString("g. v."), MakeUniString("gran vía")},
{MakeUniString("i.d."), MakeUniString("in der")},
{MakeUniString("k/s"), MakeUniString("khách sạn")},
{MakeUniString("l.go"), MakeUniString("largo")},
{MakeUniString("m-te"), MakeUniString("muntele")},
{MakeUniString("n.ª s.ª"), MakeUniString("nuestra señora")},
{MakeUniString("nat'l"), MakeUniString("national")},
{MakeUniString("n z"), MakeUniString("noordzijde")},
{MakeUniString("n. z"), MakeUniString("noordzijde")},
{MakeUniString("o.l.v"), MakeUniString("onze-lieve-vrouw")},
{MakeUniString("o z"), MakeUniString("oostzijde")},
{MakeUniString("o. z"), MakeUniString("oostzijde")},
{MakeUniString("pg. ind."), MakeUniString("polígono industrial")},
{MakeUniString("p.j."), MakeUniString("polícia judiciária")},
{MakeUniString("p. k."), MakeUniString("punto kilométrico")},
{MakeUniString("p.le"), MakeUniString("piazzale")},
{MakeUniString("p.º"), MakeUniString("paseo")},
{MakeUniString("p.º mar."), MakeUniString("paseo marítimo")},
{MakeUniString("p.s.p."), MakeUniString("polícia de segurança pública")},
{MakeUniString("p-ta"), MakeUniString("piața")},
{MakeUniString("p-ţa"), MakeUniString("piața")},
{MakeUniString("p-ța"), MakeUniString("piața")},
{MakeUniString("p.ta"), MakeUniString("porta")},
{MakeUniString("p.te"), MakeUniString("ponte")},
{MakeUniString("p.za"), MakeUniString("piazza")},
{MakeUniString("p.zza"), MakeUniString("piazza")},
{MakeUniString("r/c"), MakeUniString("rés-do-chão")},
{MakeUniString("s.a."), MakeUniString("sociedade anónima")},
{MakeUniString("s/àt"), MakeUniString("sobreàtic")},
{MakeUniString("s. c."), MakeUniString("sport clube")},
{MakeUniString("s.c."), MakeUniString("sport clube")},
{MakeUniString("s.da"), MakeUniString("salizada")},
{MakeUniString("s.ra"), MakeUniString("senhora")},
{MakeUniString("sr.ª"), MakeUniString("senhora")},
{MakeUniString("ss.ma"), MakeUniString("santissima")},
{MakeUniString("ss.me"), MakeUniString("santissime")},
{MakeUniString("ss.mi"), MakeUniString("santissimi")},
{MakeUniString("ss.mo"), MakeUniString("santissimo")},
{MakeUniString("str-la"), MakeUniString("stradela")},
{MakeUniString("v.co"), MakeUniString("vico")},
{MakeUniString("v. d."), MakeUniString("van de")},
{MakeUniString("v.d"), MakeUniString("von der")},
{MakeUniString("v.lo"), MakeUniString("vicolo")},
{MakeUniString("w z"), MakeUniString("westzijde")},
{MakeUniString("w. z"), MakeUniString("westzijde")},
{MakeUniString("z z"), MakeUniString("zuidzijde")},
{MakeUniString("z. z"), MakeUniString("zuidzijde")},
{MakeUniString("δημ. σχ"), MakeUniString("δημοτικό σχολείο")},
{MakeUniString("θεσ/νίκης"), MakeUniString("θεσσαλονίκης")},
{MakeUniString("ι"), MakeUniString("ιερά μονή")},
{MakeUniString("ι.ν"), MakeUniString("ιερός ναός")},
{MakeUniString("κων/νου"), MakeUniString("κωνσταντίνου")},
{MakeUniString("д-р"), MakeUniString("доктор")},
{MakeUniString("ж.к"), MakeUniString("жилищен комплекс")},
{MakeUniString("м-н"), MakeUniString("микрорайон")},
{MakeUniString("наб-я"), MakeUniString("набережная")},
{MakeUniString("пр-д"), MakeUniString("проезд")},
{MakeUniString("пр-т"), MakeUniString("проспект")},
};
void TransliterateHiraganaToKatakana(UniString & s)