[generator] Automate maps generation with Docker and CI/CD

Co-authored-by: Konstantin Pastbin <konstantin.pastbin@gmail.com>
Signed-off-by: zyphlar <zyphlar@gmail.com>
This commit is contained in:
zyphlar
2025-05-01 22:47:34 -07:00
committed by zyphlar
parent 7852cdb5a5
commit f16d14e07f
13 changed files with 763 additions and 251 deletions

View File

@@ -1,5 +1,5 @@
import os
import subprocess
class MapsGeneratorError(Exception):
pass
@@ -35,9 +35,24 @@ class FailedTest(MapsGeneratorError):
def wait_and_raise_if_fail(p):
if p.wait() != os.EX_OK:
args = p.args
logs = p.output.name
if p.error.name != logs:
logs += " and " + p.error.name
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)
if type(p) is subprocess.Popen:
args = p.args
stdout = p.stdout
stderr = p.stderr
logs = None
errors = None
if type(stdout) is not type(None):
logs = stdout.read(256).decode()
if type(stderr) is not type(None):
errors = stderr.read(256).decode()
if errors != logs:
logs += " and " + errors
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)
else:
args = p.args
logs = p.output.name
if p.error.name != logs:
logs += " and " + p.error.name
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)

View File

@@ -11,6 +11,7 @@ import multiprocessing
import os
import shutil
import tarfile
import errno
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import AnyStr
@@ -168,8 +169,18 @@ class StageDownloadDescriptions(Stage):
)
"""
src = "/home/planet/descriptions"
# The src folder is hardcoded here and must be implemented on the map building machine
src = "/home/planet/wikipedia/descriptions"
# The dest folder will generally become build/*/intermediate_data/descriptions
dest = env.paths.descriptions_path
# An empty source folder is a big problem
try:
if os.path.isdir(src):
print("Found %s" % (src))
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), src)
except OSError as e:
print("rmtree error: %s - %s" % (e.filename, e.strerror))
# Empty folder "descriptions" can be already created.
try:
if os.path.isdir(dest):

View File

@@ -9,6 +9,8 @@ from maps_generator.utils.file import download_file
def make_test_booking_data(max_days):
def test_booking_data(env: Env, logger, *args, **kwargs):
if not settings.HOTELS_URL:
return None
base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1)
url = f"{base_url}/meta.json"
meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json")

View File

@@ -0,0 +1,107 @@
[Developer]
# Path to the `comaps` source code repository:
OMIM_PATH: ~/comaps
# A path with the generator_tool binary:
BUILD_PATH: ~/omim-build-relwithdebinfo
[Main]
# A special small planet file will be downloaded if DEBUG is set to 1.
DEBUG: 0
# A main working directory. There is a subdirectory created for each generator run
# which contains the planet and other downloads, temporary build files, logs and completed MWMs.
MAIN_OUT_PATH: /mnt/4tbexternal/osm-maps
# Path for storing caches for nodes, ways, relations.
# If it's not set then caches are stored inside the directory of the current build.
# CACHE_PATH: ${Main:MAIN_OUT_PATH}/cache
[Generator tool]
# Path to the data/ folder in the repository:
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
# Features stage only parallelism level. Set to 0 for auto detection.
THREADS_COUNT_FEATURES_STAGE: 0
# Do not change it. This is determined automatically.
NODE_STORAGE: mem
[Osm tools]
# Path to osmctools binaries:
OSM_TOOLS_PATH: /usr/bin/
# If the binaries are not found neither in the configured path nor system-wide,
# then the tools are built from the sources:
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
[Logging]
# maps_generator's general (python output only) log file path and name.
# More detailed logs that include output of the `generator_tool` binary
# are located in the `logs/` subdir of a particular build directory,
# e.g. `maps_build/2023_06_04__20_05_07/logs/`.
LOG_FILE_PATH: ${Main:MAIN_OUT_PATH}/generation.log
[External]
# Planet file location. It should be a dump of OSM data in osm.pbf format.
# By default its an entire planet from "planet.openstreetmap.org".
# Or set it to a particular country/region extract from e.g. [Geofabrik](http://download.geofabrik.de/index.html).
# Note that an entire planet generation takes 40+ hours on a 256GB RAM server (and 1TB+ disk space).
# Stick to smaller extracts unless you have a machine this large.
# Here and further, its possible to specify either an URL (to be downloaded automatically)
# or a local file path like file:///path/to/file.
# A sample URL to download a latest OSM dump for North Macedonia:
PLANET_URL: file:///home/planet/planet/planet.o5m
# Location of the md5 checksum of the planet file:
PLANET_MD5_URL: ${External:PLANET_URL}.md5
# A base url to the latest_coasts.geom and latest_coasts.rawgeom files.
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
# then the https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
# Comment to skip getting the coastlines files.
PLANET_COASTS_URL: file:///home/planet/
# Should be 'true' for an entire planet build to make a special routing section in World.mwm
# for alerting about absent regions without which the route can't be built.
# NEED_BUILD_WORLD_ROADS: true
# Subway file location, see docs/SUBWAY_GENERATION.md if you want to generate your own file.
# Comment to disable subway layer generation.
SUBWAY_URL: file:///home/planet/subway/subways.transit.json
# Location of the EXPERIMENTAL GTFS-extracted public transport transit files:
# TRANSIT_URL:
# Urls for production maps generation.
# UGC_URL:
# HOTELS_URL:
# PROMO_CATALOG_CITIES:
# POPULARITY_URL:
# FOOD_URL:
# FOOD_TRANSLATIONS_URL:
SRTM_PATH: /home/planet/SRTM-patched-europe/
ISOLINES_PATH: /home/planet/isolines/
ADDRESSES_PATH: /home/planet/tiger/
# Local path (not url!) to .csv files.
UK_POSTCODES_URL: /home/planet/postcodes/gb-postcode-data/gb_postcodes.csv
US_POSTCODES_URL: /home/planet/postcodes/us-postcodes/uszips.csv
[Stages]
# Set to 1 to update the entire OSM planet file (as taken from "planet.openstreetmap.org")
# via an osmupdate tool before the generation. Not for use with partial planet extracts.
NEED_PLANET_UPDATE: 0
# If you want to calculate diffs you need to specify where the old maps are,
# e.g. ${Main:MAIN_OUT_PATH}/2021_03_16__09_00_00/
DATA_ARCHIVE_DIR: ${Generator tool:USER_RESOURCE_PATH}
# How many versions in the archive to use for diff calculation:
DIFF_VERSION_DEPTH: 2
[Common]
# Default parallelism level for the most of jobs. Set to 0 for auto detection.
THREADS_COUNT: 0
[Stats]
# Path to rules for calculating statistics by type:
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt