Files
comaps/tools/ruby/category_consistency/omim_parsers.rb
Konstantin Pastbin e3e4a1985a Organic Maps sources as of 02.04.2025 (fad26bbf22ac3da75e01e62aa01e5c8e11861005)
To expand with full Organic Maps and Maps.ME commits history run:
  git remote add om-historic [om-historic.git repo url]
  git fetch --tags om-historic
  git replace squashed-history historic-commits
2025-05-08 21:10:51 +07:00

105 lines
2.5 KiB
Ruby

module OmimParsers
# To update the list, run in root directory:
# sed -nEe "s/ +([a-zA-Z]{2}(-[a-zA-Z]{2,})?) = .*$/\1/p" "data/strings/strings.txt" | sort -u | tr '\n' ' ' | sed -e 's/,$//' | fold -s -w48; echo
LANGUAGES = %w(af ar be bg ca cs da de el en en-GB es es-MX et
eu fa fi fr fr-CA he hi hu id it ja ko lt mr nb
nl pl pt pt-BR ro ru sk sv sw th tr uk vi
zh-Hans zh-Hant)
class AbstractParser
def initialize(keys)
@keys = keys
end
def parse_line(line)
raise NotImplementedError.new("You must implement parse_file.")
end
def match_category(line, result)
category_match = category.match(line)
if !category_match.nil?
category = category_match[1]
if @keys.include? category
result[category] ||= {}
end
end
end
def parse_file(filename)
current_string = nil
result = {}
File.open(filename, 'r:UTF-8').each do |line|
line.strip!
next if should_exclude_line? line
# If line is empty -> next category block started
if line.empty?
current_string = nil
next
end
current_string ||= match_category(line, result)
parsed = parse_line(line)
if !parsed.nil? and !current_string.nil?
lang, translation = parsed
current_string[lang] = translation
end
end
result
end
def category
raise NotImplementedError.new("You must implement category.")
end
def should_exclude_line?(line)
false
end
end
class CategoriesParser < AbstractParser
def parse_line(line)
line_match = /^([^:]+):(\S+)$/u.match(line)
return if !line_match
lang = $1.strip
return if !LANGUAGES.include? lang
translation = $2.strip
synonyms = []
translation.split('|').each do |token|
token_match = /\d?\^?(.*)$/.match(token)
synonyms.push(token_match[1]) if token_match
end
[lang, synonyms]
end
def should_exclude_line?(line)
line.start_with? '#'
end
def category
# We match only global categories ('food', 'bank'...)
/^@([A-Za-z0-9]+)$/
end
end
class StringsParser < AbstractParser
def parse_line(line)
line_match = /^([^=]+)=(.*)$/.match(line)
if line_match
lang = $1.strip
if LANGUAGES.include? lang
[lang, $2.strip]
end
end
end
def category
/^\[(.+)\]/
end
end
end