diff --git a/.gitignore b/.gitignore index c895ffa8a..5a62a861c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ Makefile.Release object_script.*.Debug object_script.*.Release compile_commands.json +*.local.* stxxl.errlog stxxl.log diff --git a/tools/python/maps_generator/generator/exceptions.py b/tools/python/maps_generator/generator/exceptions.py index 3f8c94f2e..dec292b9b 100644 --- a/tools/python/maps_generator/generator/exceptions.py +++ b/tools/python/maps_generator/generator/exceptions.py @@ -1,5 +1,5 @@ import os - +import subprocess class MapsGeneratorError(Exception): pass @@ -35,9 +35,24 @@ class FailedTest(MapsGeneratorError): def wait_and_raise_if_fail(p): if p.wait() != os.EX_OK: - args = p.args - logs = p.output.name - if p.error.name != logs: - logs += " and " + p.error.name - msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}" - raise BadExitStatusError(msg) + if type(p) is subprocess.Popen: + args = p.args + stdout = p.stdout + stderr = p.stderr + logs = None + errors = None + if type(stdout) is not type(None): + logs = stdout.read(256).decode() + if type(stderr) is not type(None): + errors = stderr.read(256).decode() + if errors != logs: + logs += " and " + errors + msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}" + raise BadExitStatusError(msg) + else: + args = p.args + logs = p.output.name + if p.error.name != logs: + logs += " and " + p.error.name + msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}" + raise BadExitStatusError(msg) diff --git a/tools/python/maps_generator/generator/stages_tests.py b/tools/python/maps_generator/generator/stages_tests.py index 821ec2add..b49f335b6 100644 --- a/tools/python/maps_generator/generator/stages_tests.py +++ b/tools/python/maps_generator/generator/stages_tests.py @@ -9,6 +9,8 @@ from maps_generator.utils.file import download_file def make_test_booking_data(max_days): def test_booking_data(env: Env, logger, *args, **kwargs): + if not settings.HOTELS_URL: + return None base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1) url = f"{base_url}/meta.json" meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json") diff --git a/tools/python/maps_generator/var/etc/map_generator.ini.prod b/tools/python/maps_generator/var/etc/map_generator.ini.prod new file mode 100644 index 000000000..cbb02d1be --- /dev/null +++ b/tools/python/maps_generator/var/etc/map_generator.ini.prod @@ -0,0 +1,109 @@ +[Developer] +# Path to the `organicmaps` source code repository: +OMIM_PATH: ~/OM/organicmaps +# A path with the generator_tool binary: +BUILD_PATH: /root/OM/omim-build-relwithdebinfo +#${Developer:OMIM_PATH}/../omim-build-release + + +[Main] +# A special small planet file will be downloaded if DEBUG is set to 1. +DEBUG: 0 +# A main working directory. There is a subdirectory created for each generator run +# which contains the planet and other downloads, temporary build files, logs and completed MWMs. +MAIN_OUT_PATH: ${Developer:OMIM_PATH}/../maps_build +# Path for storing caches for nodes, ways, relations. +# If it's not set then caches are stored inside the directory of the current build. +# CACHE_PATH: ${Main:MAIN_OUT_PATH}/cache + + +[Generator tool] +# Path to the data/ folder in the repository: +USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data +# Features stage only parallelism level. Set to 0 for auto detection. +THREADS_COUNT_FEATURES_STAGE: 16 +# Do not change it. This is determined automatically. +NODE_STORAGE: mem + + +[Osm tools] +# Path to osmctools binaries: +OSM_TOOLS_PATH: /usr/bin/ +#${Developer:OMIM_PATH}/../osmctools +# If the binaries are not found neither in the configured path nor system-wide, +# then the tools are built from the sources: +OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools + +[Logging] +# maps_generator's general (python output only) log file path and name. +# More detailed logs that include output of the `generator_tool` binary +# are located in the `logs/` subdir of a particular build directory, +# e.g. `maps_build/2023_06_04__20_05_07/logs/`. +LOG_FILE_PATH: ${Main:MAIN_OUT_PATH}/generation.log + + +[External] +# Planet file location. It should be a dump of OSM data in osm.pbf format. +# By default its an entire planet from "planet.openstreetmap.org". +# Or set it to a particular country/region extract from e.g. [Geofabrik](http://download.geofabrik.de/index.html). +# Note that an entire planet generation takes 40+ hours on a 256GB RAM server (and 1TB+ disk space). +# Stick to smaller extracts unless you have a machine this large. +# Here and further, its possible to specify either an URL (to be downloaded automatically) +# or a local file path like file:///path/to/file. +# A sample URL to download a latest OSM dump for North Macedonia: +PLANET_URL: file:///home/planet/planet/planet.o5m +# Location of the md5 checksum of the planet file: +PLANET_MD5_URL: ${External:PLANET_URL}.md5 +# A base url to the latest_coasts.geom and latest_coasts.rawgeom files. +# For example, if PLANET_COASTS_URL = https://somesite.com/download/ +# then the https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and +# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom. +# Comment to skip getting the coastlines files. +PLANET_COASTS_URL: file:///home/planet/ +# Should be 'true' for an entire planet build to make a special routing section in World.mwm +# for alerting about absent regions without which the route can't be built. +# NEED_BUILD_WORLD_ROADS: true +# Subway file location, see docs/SUBWAY_GENERATION.md if you want to generate your own file. +# Comment to disable subway layer generation. + +SUBWAY_URL: file:///home/planet/subway/subway.transit.json + +# Location of the EXPERIMENTAL GTFS-extracted public transport transit files: +# TRANSIT_URL: + +# Urls for production maps generation. +# UGC_URL: +# HOTELS_URL: +# PROMO_CATALOG_CITIES: +# POPULARITY_URL: +# FOOD_URL: +# FOOD_TRANSLATIONS_URL: + +SRTM_PATH: /home/planet/SRTM-patched-europe/ +ISOLINES_PATH: /home/planet/isolines/ +ADDRESSES_PATH: /home/planet/tiger/ + +# Local path (not url!) to .csv files. +UK_POSTCODES_URL: /home/planet/postcodes/gb-postcode-data/gb_postcodes.csv +US_POSTCODES_URL: /home/planet/postcodes/us-postcodes/uszips.csv + + +[Stages] +# Set to 1 to update the entire OSM planet file (as taken from "planet.openstreetmap.org") +# via an osmupdate tool before the generation. Not for use with partial planet extracts. +NEED_PLANET_UPDATE: 0 +# If you want to calculate diffs you need to specify where the old maps are, +# e.g. ${Main:MAIN_OUT_PATH}/2021_03_16__09_00_00/ +DATA_ARCHIVE_DIR: ${Generator tool:USER_RESOURCE_PATH} +# How many versions in the archive to use for diff calculation: +DIFF_VERSION_DEPTH: 2 + + +[Common] +# Default parallelism level for the most of jobs. Set to 0 for auto detection. +THREADS_COUNT: 0 + + +[Stats] +# Path to rules for calculating statistics by type: +STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt diff --git a/tools/unix/Dockerfile b/tools/unix/Dockerfile new file mode 100755 index 000000000..d07d34048 --- /dev/null +++ b/tools/unix/Dockerfile @@ -0,0 +1,54 @@ +# See run-docker.sh for cloning, building, and running the maps generator Docker routine +FROM ubuntu:noble + +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC + +RUN apt-get update -qq \ + && apt-get install -y --no-install-recommends \ + curl \ + osmctools \ + s3cmd \ + sshpass \ + vim \ + wget \ + build-essential \ + clang \ + cmake \ + ninja-build \ + python3 \ + python3-pip \ + python3.12-venv \ + qt6-base-dev \ + qt6-positioning-dev \ + libc++-dev \ + libfreetype-dev \ + libglvnd-dev \ + libgl1-mesa-dev \ + libharfbuzz-dev \ + libicu-dev \ + libqt6svg6-dev \ + libqt6positioning6-plugins \ + libqt6positioning6 \ + libsqlite3-dev \ + libxrandr-dev \ + libxinerama-dev \ + libxcursor-dev \ + libxi-dev \ + zlib1g-dev \ + && rm -rf /var/cache/apt/* /var/lib/apt/lists/*; + +RUN mkdir /root/OM +# When running, bind mount the repo here +RUN mkdir /root/OM/organicmaps +# And a volume to store the large output/temp files here +RUN mkdir /root/OM/maps_build +# And a volume to store >10gb files for the planet here +RUN mkdir /home/planet + +WORKDIR /root/OM/organicmaps + +# For debugging +#CMD /bin/bash + +CMD /root/OM/organicmaps/tools/unix/docker_maps_generator.sh diff --git a/tools/unix/docker_maps_generator.sh b/tools/unix/docker_maps_generator.sh new file mode 100755 index 000000000..6a6269963 --- /dev/null +++ b/tools/unix/docker_maps_generator.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash + +#Volumes/paths for downloads: +#home/planet/planet/planet.o5m +#home/planet/planet/planet.o5m.md5 +#PLANET_COASTS_URL:file:///home/planet/planet/ + #home/planet/planet/latest_coasts.geom and latest_coasts.rawgeom +#SUBWAY_URL: file:///home/planet/subway/beta.json + #home/planet/subway/beta.json +#HOTELS_URL:/home/planet/planet/kayak/ + #home/planet/planet/kayak/ +#SRTM_PATH:/home/planet/SRTM-patched-europe/ +#ISOLINES_PATH:/home/planet/planet/isolines/ +#ADDRESSES_PATH:/home/planet/planet/tiger/ +#UK_POSTCODES_URL:/home/planet/postcodes/gb-postcode-data/gb_postcodes.csv +#US_POSTCODES_URL:/home/planet/postcodes/us-postcodes/uszips.csv + +echo "<$(date +%T)> Starting..." + +# Prepare paths +# +# Already created by Dockerfile: +# /root/OM +# /root/OM/organicmaps +# /root/OM/maps_build +# /home/planet +# +mkdir -p /root/.config/OMaps # Odd mkdir permission errors in generator_tool in Docker without these +chmod -R 777 /root/.config +mkdir -p ~/OM/maps_build +mkdir -p ~/OM/omim-build-release +mkdir -p ~/OM/osmctools +mkdir -p /home/planet/planet/isolines/ +mkdir -p /home/planet/planet/kayak/ +mkdir -p /home/planet/planet/tiger/ +mkdir -p /home/planet/postcodes/gb-postcode-data/ +mkdir -p /home/planet/postcodes/us-postcodes/ +mkdir -p /home/planet/SRTM-patched-europe/ +mkdir -p /home/planet/subway + +echo "Writing S3 config..." +echo "[default]" > ~/.s3cfg +echo "access_key = $S3_KEY_ID" >> ~/.s3cfg +echo "secret_key = $S3_SECRET_KEY" >> ~/.s3cfg +echo "host_base = $S3_HOST_BASE" >> ~/.s3cfg +echo "host_bucket = \$(bucket)s.$S3_HOST_BASE" >> ~/.s3cfg +# S3_BUCKET is used during upload + +echo "Wrote:" +cat ~/.s3cfg + +echo "<$(date +%T)> Running ./configure.sh ..." +cd ~/OM/organicmaps +./configure.sh + +echo "<$(date +%T)> Compiling tools..." +cd ~/OM/organicmaps +./tools/unix/build_omim.sh -R generator_tool +./tools/unix/build_omim.sh -R world_roads_builder_tool +./tools/unix/build_omim.sh -R mwm_diff_tool +cd tools/python/maps_generator +python3 -m venv /tmp/venv +/tmp/venv/bin/pip3 install -r requirements_dev.txt + +echo "<$(date +%T)> Copying map generator INI..." +cp var/etc/map_generator.ini.prod var/etc/map_generator.ini + +#TODO: may be duplicated by maps_generator at "osmctools are not found, building from the sources" +#echo "<$(date +%T)> Prebuild some tools so we can make an o5m file or run update_planet..." +#cd ~/OM/organicmaps/tools/osmctools +#gcc osmupdate.c -l z -o ~/OM/osmctools/osmupdate +#gcc osmconvert.c -l z -o ~/OM/osmctools/osmconvert + +# May be unnecessary when running world +# /tmp/venv/bin/python -m maps_generator --coasts +# save to /path/to/coasts WorldCoasts.geom as latest_coasts.geom and WorldCoasts.rawgeom latest_coasts.rawgeom +# (from https://github.com/mapsme/omim/issues/11994) + +cd /home/planet/planet +if [ ! -f planet-latest.osm.pbf ]; then + echo "<$(date +%T)> Downloading planet-latest.osm.pbf..." + curl -OL https://ftpmirror.your.org/pub/openstreetmap/pbf/planet-latest.osm.pbf + echo "<$(date +%T)> Downloading planet-latest.osm.pbf.md5..." + curl -OL https://ftpmirror.your.org/pub/openstreetmap/pbf/planet-latest.osm.pbf.md5 +else + echo "<$(date +%T)> planet-latest.osm.pbf exists, not downloading..." +fi + +#curl -OL https://download.geofabrik.de/north-america/us-west-latest.osm.pbf +#curl -OL https://download.geofabrik.de/north-america/us-west-latest.osm.pbf.md5 +# (rename us-west-latest to planet-latest and edit the md5 file accordingly) +if [ ! -f planet.o5m ]; then + echo "<$(date +%T)> Converting planet-latest.osm.pbf to planet.o5m..." + ~/OM/osmctools/osmconvert planet-latest.osm.pbf -o=planet.o5m +else + echo "<$(date +%T)> planet.o5m exists, not converting..." +fi +# (currently unused:) ~/OM/organicmaps/tools/unix/update_planet.sh planet.o5m + + +echo "<$(date +%T)> Generating maps..." +cd ~/OM/organicmaps/tools/python +/tmp/venv/bin/python -m maps_generator --skip="MwmDiffs" +#/tmp/venv/bin/python -m maps_generator --skip="MwmDiffs" --continue + +# do not use --production except for Kayak/recommendation/popularity/food data +#/tmp/venv/bin/python -m maps_generator --countries="World, WorldCoasts, US_Oregon_*, US_California_*, US_Washington_*" --production +#/tmp/venv/bin/python -m maps_generator --countries="US_Oregon_Portland" --skip="MwmDiffs" +#/tmp/venv/bin/python -m maps_generator --countries="Macedonia" --skip="MwmDiffs" + +shopt -s nullglob +mwmfiles=( ~/OM/maps_build/*/*/*.mwm ) + +if (( ${#mwmfiles[@]} )); then + echo "<$(date +%T)> Uploading maps..." + # TODO: upload limited files via SFTP to Dreamhost (cdn-us-1.comaps.app) + # Needs StrictHostKeyChecking=no otherwise new containers/SFTP_HOSTs will require a manual ssh attempt + #sshpass -p $SFTP_PASSWORD sftp -o StrictHostKeyChecking=no $SFTP_USER@$SFTP_HOST:$SFTP_PATH < No MWM files, not uploading maps." +fi + +echo "<$(date +%T)> Temporarily NOT Removing intermediate data..." +#rm -rf ~/OM/maps_build/*/intermediate_data +# rm -rf ~/OM/ + +echo "<$(date +%T)> DONE" + diff --git a/tools/unix/maps/generate_subways.sh b/tools/unix/maps/generate_subways.sh index 867210368..3808870fe 100755 --- a/tools/unix/maps/generate_subways.sh +++ b/tools/unix/maps/generate_subways.sh @@ -20,9 +20,9 @@ export SKIP_PLANET_UPDATE="1" # If unavailable then replace with a local file. # TODO: keep the downloaded csv file from the latest run. #export CITIES_INFO_URL="" -export TMPDIR="$BUILD_PATH/subways" +export TMPDIR="$BUILD_PATH/subway" # The output file, which needs post-processing by transit_graph_generator.py -export MAPSME="$SUBWAYS_PATH/subways.json" +export MAPSME="$SUBWAYS_PATH/subway.json" # Produce additional files needed for https://cdn.organicmaps.app/subway/ export HTML_DIR="$SUBWAYS_VALIDATOR_PATH" @@ -36,7 +36,7 @@ export DUMP_CITY_LIST="$SUBWAYS_VALIDATOR_PATH/cities.txt" cp -r "$SUBWAYS_REPO_PATH"/render/* "$SUBWAYS_VALIDATOR_PATH/" TRANSIT_TOOL_PATH="$REPO_PATH/tools/python/transit" -SUBWAYS_GRAPH_FILE="$SUBWAYS_PATH/subways.transit.json" +SUBWAYS_GRAPH_FILE="$SUBWAYS_PATH/subway.transit.json" activate_venv_at_path "$TRANSIT_TOOL_PATH" "$PYTHON" "$TRANSIT_TOOL_PATH/transit_graph_generator.py" "$MAPSME" "$SUBWAYS_GRAPH_FILE" 2>&1 | tee -a "$SUBWAYS_LOG" diff --git a/tools/unix/maps/settings_default.sh b/tools/unix/maps/settings_default.sh index 95cf37974..74f90139f 100644 --- a/tools/unix/maps/settings_default.sh +++ b/tools/unix/maps/settings_default.sh @@ -20,6 +20,6 @@ PLANET_O5M="${PLANET_O5M:-$PLANET_PATH/planet-latest.o5m}" # Subways SUBWAYS_REPO_PATH="${SUBWAYS_REPO_PATH:-$CODE_PATH/subways}" -SUBWAYS_PATH="${SUBWAYS_PATH:-$DATA_PATH/subways}" -SUBWAYS_LOG="${SUBWAYS_LOG:-$SUBWAYS_PATH/subways.log}" +SUBWAYS_PATH="${SUBWAYS_PATH:-$DATA_PATH/subway}" +SUBWAYS_LOG="${SUBWAYS_LOG:-$SUBWAYS_PATH/subway.log}" SUBWAYS_VALIDATOR_PATH="${SUBWAYS_VALIDATOR_PATH:-$SUBWAYS_PATH/validator}" diff --git a/tools/unix/update_planet.sh b/tools/unix/update_planet.sh new file mode 100755 index 000000000..33c6a0eb4 --- /dev/null +++ b/tools/unix/update_planet.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -euxo pipefail + +OSMUPDATE=~/osmctools/osmupdate +# osmconvert should be accessible in PATH. +PATH="$(dirname "$OSMUPDATE"):$PATH" + +# Pass pbf or o5m file as a parameter +OLD="$1" +NEW="${1/.pbf/.new.pbf}" +NEW="${NEW/.o5m/.new.o5m}" + +"$OSMUPDATE" -v --drop-authors --drop-version --hash-memory=512000 "$OLD" "$NEW" +# Uncomment to replace old planet. +mv "$NEW" "$OLD" +#md5sum -b "$OLD" > "$OLD.md5" +echo "Successfully updated $OLD"