module OmimParsers # To update the list, run in root directory: # sed -nEe "s/ +([a-zA-Z]{2}(-[a-zA-Z]{2,})?) = .*$/\1/p" "data/strings/strings.txt" | sort -u | tr '\n' ' ' | sed -e 's/,$//' | fold -s -w48; echo LANGUAGES = %w(af ar be bg ca cs da de el en en-GB es es-MX et eu fa fi fr fr-CA he hi hu id it ja ko lt mr nb nl pl pt pt-BR ro ru sk sv sw th tr uk vi zh-Hans zh-Hant) class AbstractParser def initialize(keys) @keys = keys end def parse_line(line) raise NotImplementedError.new("You must implement parse_file.") end def match_category(line, result) category_match = category.match(line) if !category_match.nil? category = category_match[1] if @keys.include? category result[category] ||= {} end end end def parse_file(filename) current_string = nil result = {} File.open(filename, 'r:UTF-8').each do |line| line.strip! next if should_exclude_line? line # If line is empty -> next category block started if line.empty? current_string = nil next end current_string ||= match_category(line, result) parsed = parse_line(line) if !parsed.nil? and !current_string.nil? lang, translation = parsed current_string[lang] = translation end end result end def category raise NotImplementedError.new("You must implement category.") end def should_exclude_line?(line) false end end class CategoriesParser < AbstractParser def parse_line(line) line_match = /^([^:]+):(\S+)$/u.match(line) return if !line_match lang = $1.strip return if !LANGUAGES.include? lang translation = $2.strip synonyms = [] translation.split('|').each do |token| token_match = /\d?\^?(.*)$/.match(token) synonyms.push(token_match[1]) if token_match end [lang, synonyms] end def should_exclude_line?(line) line.start_with? '#' end def category # We match only global categories ('food', 'bank'...) /^@([A-Za-z0-9]+)$/ end end class StringsParser < AbstractParser def parse_line(line) line_match = /^([^=]+)=(.*)$/.match(line) if line_match lang = $1.strip if LANGUAGES.include? lang [lang, $2.strip] end end end def category /^\[(.+)\]/ end end end