Add files to automatically generate maps with Docker

Signed-off-by: zyphlar <zyphlar@gmail.com>
This commit is contained in:
zyphlar
2025-05-01 22:47:34 -07:00
committed by Konstantin Pastbin
parent 3f7815017e
commit cc698dba49
9 changed files with 348 additions and 12 deletions

1
.gitignore vendored
View File

@@ -9,6 +9,7 @@ Makefile.Release
object_script.*.Debug object_script.*.Debug
object_script.*.Release object_script.*.Release
compile_commands.json compile_commands.json
*.local.*
stxxl.errlog stxxl.errlog
stxxl.log stxxl.log

View File

@@ -1,5 +1,5 @@
import os import os
import subprocess
class MapsGeneratorError(Exception): class MapsGeneratorError(Exception):
pass pass
@@ -35,6 +35,21 @@ class FailedTest(MapsGeneratorError):
def wait_and_raise_if_fail(p): def wait_and_raise_if_fail(p):
if p.wait() != os.EX_OK: if p.wait() != os.EX_OK:
if type(p) is subprocess.Popen:
args = p.args
stdout = p.stdout
stderr = p.stderr
logs = None
errors = None
if type(stdout) is not type(None):
logs = stdout.read(256).decode()
if type(stderr) is not type(None):
errors = stderr.read(256).decode()
if errors != logs:
logs += " and " + errors
msg = f"The launch of {args.pop(0)} failed.\nArguments used: {' '.join(args)}\nSee details in {logs}"
raise BadExitStatusError(msg)
else:
args = p.args args = p.args
logs = p.output.name logs = p.output.name
if p.error.name != logs: if p.error.name != logs:

View File

@@ -9,6 +9,8 @@ from maps_generator.utils.file import download_file
def make_test_booking_data(max_days): def make_test_booking_data(max_days):
def test_booking_data(env: Env, logger, *args, **kwargs): def test_booking_data(env: Env, logger, *args, **kwargs):
if not settings.HOTELS_URL:
return None
base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1) base_url, _ = settings.HOTELS_URL.rsplit("/", maxsplit=1)
url = f"{base_url}/meta.json" url = f"{base_url}/meta.json"
meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json") meta_path = os.path.join(env.paths.tmp_dir(), "hotels-meta.json")

View File

@@ -0,0 +1,109 @@
[Developer]
# Path to the `organicmaps` source code repository:
OMIM_PATH: ~/OM/organicmaps
# A path with the generator_tool binary:
BUILD_PATH: /root/OM/omim-build-relwithdebinfo
#${Developer:OMIM_PATH}/../omim-build-release
[Main]
# A special small planet file will be downloaded if DEBUG is set to 1.
DEBUG: 0
# A main working directory. There is a subdirectory created for each generator run
# which contains the planet and other downloads, temporary build files, logs and completed MWMs.
MAIN_OUT_PATH: ${Developer:OMIM_PATH}/../maps_build
# Path for storing caches for nodes, ways, relations.
# If it's not set then caches are stored inside the directory of the current build.
# CACHE_PATH: ${Main:MAIN_OUT_PATH}/cache
[Generator tool]
# Path to the data/ folder in the repository:
USER_RESOURCE_PATH: ${Developer:OMIM_PATH}/data
# Features stage only parallelism level. Set to 0 for auto detection.
THREADS_COUNT_FEATURES_STAGE: 16
# Do not change it. This is determined automatically.
NODE_STORAGE: mem
[Osm tools]
# Path to osmctools binaries:
OSM_TOOLS_PATH: /usr/bin/
#${Developer:OMIM_PATH}/../osmctools
# If the binaries are not found neither in the configured path nor system-wide,
# then the tools are built from the sources:
OSM_TOOLS_SRC_PATH: ${Developer:OMIM_PATH}/tools/osmctools
[Logging]
# maps_generator's general (python output only) log file path and name.
# More detailed logs that include output of the `generator_tool` binary
# are located in the `logs/` subdir of a particular build directory,
# e.g. `maps_build/2023_06_04__20_05_07/logs/`.
LOG_FILE_PATH: ${Main:MAIN_OUT_PATH}/generation.log
[External]
# Planet file location. It should be a dump of OSM data in osm.pbf format.
# By default its an entire planet from "planet.openstreetmap.org".
# Or set it to a particular country/region extract from e.g. [Geofabrik](http://download.geofabrik.de/index.html).
# Note that an entire planet generation takes 40+ hours on a 256GB RAM server (and 1TB+ disk space).
# Stick to smaller extracts unless you have a machine this large.
# Here and further, its possible to specify either an URL (to be downloaded automatically)
# or a local file path like file:///path/to/file.
# A sample URL to download a latest OSM dump for North Macedonia:
PLANET_URL: file:///home/planet/planet/planet.o5m
# Location of the md5 checksum of the planet file:
PLANET_MD5_URL: ${External:PLANET_URL}.md5
# A base url to the latest_coasts.geom and latest_coasts.rawgeom files.
# For example, if PLANET_COASTS_URL = https://somesite.com/download/
# then the https://somesite.com/download/latest_coasts.geom url will be used to download latest_coasts.geom and
# the https://somesite.com/download/latest_coasts.rawgeom url will be used to download latest_coasts.rawgeom.
# Comment to skip getting the coastlines files.
PLANET_COASTS_URL: file:///home/planet/
# Should be 'true' for an entire planet build to make a special routing section in World.mwm
# for alerting about absent regions without which the route can't be built.
# NEED_BUILD_WORLD_ROADS: true
# Subway file location, see docs/SUBWAY_GENERATION.md if you want to generate your own file.
# Comment to disable subway layer generation.
SUBWAY_URL: file:///home/planet/subway/subway.transit.json
# Location of the EXPERIMENTAL GTFS-extracted public transport transit files:
# TRANSIT_URL:
# Urls for production maps generation.
# UGC_URL:
# HOTELS_URL:
# PROMO_CATALOG_CITIES:
# POPULARITY_URL:
# FOOD_URL:
# FOOD_TRANSLATIONS_URL:
SRTM_PATH: /home/planet/SRTM-patched-europe/
ISOLINES_PATH: /home/planet/isolines/
ADDRESSES_PATH: /home/planet/tiger/
# Local path (not url!) to .csv files.
UK_POSTCODES_URL: /home/planet/postcodes/gb-postcode-data/gb_postcodes.csv
US_POSTCODES_URL: /home/planet/postcodes/us-postcodes/uszips.csv
[Stages]
# Set to 1 to update the entire OSM planet file (as taken from "planet.openstreetmap.org")
# via an osmupdate tool before the generation. Not for use with partial planet extracts.
NEED_PLANET_UPDATE: 0
# If you want to calculate diffs you need to specify where the old maps are,
# e.g. ${Main:MAIN_OUT_PATH}/2021_03_16__09_00_00/
DATA_ARCHIVE_DIR: ${Generator tool:USER_RESOURCE_PATH}
# How many versions in the archive to use for diff calculation:
DIFF_VERSION_DEPTH: 2
[Common]
# Default parallelism level for the most of jobs. Set to 0 for auto detection.
THREADS_COUNT: 0
[Stats]
# Path to rules for calculating statistics by type:
STATS_TYPES_CONFIG: ${Developer:OMIM_PATH}/tools/python/maps_generator/var/etc/stats_types_config.txt

54
tools/unix/Dockerfile Executable file
View File

@@ -0,0 +1,54 @@
# See run-docker.sh for cloning, building, and running the maps generator Docker routine
FROM ubuntu:noble
ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
RUN apt-get update -qq \
&& apt-get install -y --no-install-recommends \
curl \
osmctools \
s3cmd \
sshpass \
vim \
wget \
build-essential \
clang \
cmake \
ninja-build \
python3 \
python3-pip \
python3.12-venv \
qt6-base-dev \
qt6-positioning-dev \
libc++-dev \
libfreetype-dev \
libglvnd-dev \
libgl1-mesa-dev \
libharfbuzz-dev \
libicu-dev \
libqt6svg6-dev \
libqt6positioning6-plugins \
libqt6positioning6 \
libsqlite3-dev \
libxrandr-dev \
libxinerama-dev \
libxcursor-dev \
libxi-dev \
zlib1g-dev \
&& rm -rf /var/cache/apt/* /var/lib/apt/lists/*;
RUN mkdir /root/OM
# When running, bind mount the repo here
RUN mkdir /root/OM/organicmaps
# And a volume to store the large output/temp files here
RUN mkdir /root/OM/maps_build
# And a volume to store >10gb files for the planet here
RUN mkdir /home/planet
WORKDIR /root/OM/organicmaps
# For debugging
#CMD /bin/bash
CMD /root/OM/organicmaps/tools/unix/docker_maps_generator.sh

View File

@@ -0,0 +1,138 @@
#!/usr/bin/env bash
#Volumes/paths for downloads:
#home/planet/planet/planet.o5m
#home/planet/planet/planet.o5m.md5
#PLANET_COASTS_URL:file:///home/planet/planet/
#home/planet/planet/latest_coasts.geom and latest_coasts.rawgeom
#SUBWAY_URL: file:///home/planet/subway/beta.json
#home/planet/subway/beta.json
#HOTELS_URL:/home/planet/planet/kayak/
#home/planet/planet/kayak/
#SRTM_PATH:/home/planet/SRTM-patched-europe/
#ISOLINES_PATH:/home/planet/planet/isolines/
#ADDRESSES_PATH:/home/planet/planet/tiger/
#UK_POSTCODES_URL:/home/planet/postcodes/gb-postcode-data/gb_postcodes.csv
#US_POSTCODES_URL:/home/planet/postcodes/us-postcodes/uszips.csv
echo "<$(date +%T)> Starting..."
# Prepare paths
#
# Already created by Dockerfile:
# /root/OM
# /root/OM/organicmaps
# /root/OM/maps_build
# /home/planet
#
mkdir -p /root/.config/OMaps # Odd mkdir permission errors in generator_tool in Docker without these
chmod -R 777 /root/.config
mkdir -p ~/OM/maps_build
mkdir -p ~/OM/omim-build-release
mkdir -p ~/OM/osmctools
mkdir -p /home/planet/planet/isolines/
mkdir -p /home/planet/planet/kayak/
mkdir -p /home/planet/planet/tiger/
mkdir -p /home/planet/postcodes/gb-postcode-data/
mkdir -p /home/planet/postcodes/us-postcodes/
mkdir -p /home/planet/SRTM-patched-europe/
mkdir -p /home/planet/subway
echo "Writing S3 config..."
echo "[default]" > ~/.s3cfg
echo "access_key = $S3_KEY_ID" >> ~/.s3cfg
echo "secret_key = $S3_SECRET_KEY" >> ~/.s3cfg
echo "host_base = $S3_HOST_BASE" >> ~/.s3cfg
echo "host_bucket = \$(bucket)s.$S3_HOST_BASE" >> ~/.s3cfg
# S3_BUCKET is used during upload
echo "Wrote:"
cat ~/.s3cfg
echo "<$(date +%T)> Running ./configure.sh ..."
cd ~/OM/organicmaps
./configure.sh
echo "<$(date +%T)> Compiling tools..."
cd ~/OM/organicmaps
./tools/unix/build_omim.sh -R generator_tool
./tools/unix/build_omim.sh -R world_roads_builder_tool
./tools/unix/build_omim.sh -R mwm_diff_tool
cd tools/python/maps_generator
python3 -m venv /tmp/venv
/tmp/venv/bin/pip3 install -r requirements_dev.txt
echo "<$(date +%T)> Copying map generator INI..."
cp var/etc/map_generator.ini.prod var/etc/map_generator.ini
#TODO: may be duplicated by maps_generator at "osmctools are not found, building from the sources"
#echo "<$(date +%T)> Prebuild some tools so we can make an o5m file or run update_planet..."
#cd ~/OM/organicmaps/tools/osmctools
#gcc osmupdate.c -l z -o ~/OM/osmctools/osmupdate
#gcc osmconvert.c -l z -o ~/OM/osmctools/osmconvert
# May be unnecessary when running world
# /tmp/venv/bin/python -m maps_generator --coasts
# save to /path/to/coasts WorldCoasts.geom as latest_coasts.geom and WorldCoasts.rawgeom latest_coasts.rawgeom
# (from https://github.com/mapsme/omim/issues/11994)
cd /home/planet/planet
if [ ! -f planet-latest.osm.pbf ]; then
echo "<$(date +%T)> Downloading planet-latest.osm.pbf..."
curl -OL https://ftpmirror.your.org/pub/openstreetmap/pbf/planet-latest.osm.pbf
echo "<$(date +%T)> Downloading planet-latest.osm.pbf.md5..."
curl -OL https://ftpmirror.your.org/pub/openstreetmap/pbf/planet-latest.osm.pbf.md5
else
echo "<$(date +%T)> planet-latest.osm.pbf exists, not downloading..."
fi
#curl -OL https://download.geofabrik.de/north-america/us-west-latest.osm.pbf
#curl -OL https://download.geofabrik.de/north-america/us-west-latest.osm.pbf.md5
# (rename us-west-latest to planet-latest and edit the md5 file accordingly)
if [ ! -f planet.o5m ]; then
echo "<$(date +%T)> Converting planet-latest.osm.pbf to planet.o5m..."
~/OM/osmctools/osmconvert planet-latest.osm.pbf -o=planet.o5m
else
echo "<$(date +%T)> planet.o5m exists, not converting..."
fi
# (currently unused:) ~/OM/organicmaps/tools/unix/update_planet.sh planet.o5m
echo "<$(date +%T)> Generating maps..."
cd ~/OM/organicmaps/tools/python
/tmp/venv/bin/python -m maps_generator --skip="MwmDiffs"
#/tmp/venv/bin/python -m maps_generator --skip="MwmDiffs" --continue
# do not use --production except for Kayak/recommendation/popularity/food data
#/tmp/venv/bin/python -m maps_generator --countries="World, WorldCoasts, US_Oregon_*, US_California_*, US_Washington_*" --production
#/tmp/venv/bin/python -m maps_generator --countries="US_Oregon_Portland" --skip="MwmDiffs"
#/tmp/venv/bin/python -m maps_generator --countries="Macedonia" --skip="MwmDiffs"
shopt -s nullglob
mwmfiles=( ~/OM/maps_build/*/*/*.mwm )
if (( ${#mwmfiles[@]} )); then
echo "<$(date +%T)> Uploading maps..."
# TODO: upload limited files via SFTP to Dreamhost (cdn-us-1.comaps.app)
# Needs StrictHostKeyChecking=no otherwise new containers/SFTP_HOSTs will require a manual ssh attempt
#sshpass -p $SFTP_PASSWORD sftp -o StrictHostKeyChecking=no $SFTP_USER@$SFTP_HOST:$SFTP_PATH <<EOF
#put ~/OM/maps_build/20*/2*/countries.txt
#put ~/OM/maps_build/20*/2*/World.mwm
#put ~/OM/maps_build/20*/2*/WorldCoasts.mwm
#exit
#EOF
# TODO: upload all files via rclone to Cloudflare (R2)
#s3cmd put ~/OM/maps_build/generation.log "s3://$S3_BUCKET/$(date +%y%m%d)/"
#s3cmd put ~/OM/maps_build/*/*/*.mwm "s3://$S3_BUCKET/$(date +%y%m%d)/" --recursive
#s3cmd put ~/OM/maps_build/*/logs "s3://$S3_BUCKET/$(date +%y%m%d)/" --recursive
else
echo "<$(date +%T)> No MWM files, not uploading maps."
fi
echo "<$(date +%T)> Temporarily NOT Removing intermediate data..."
#rm -rf ~/OM/maps_build/*/intermediate_data
# rm -rf ~/OM/
echo "<$(date +%T)> DONE"

View File

@@ -20,9 +20,9 @@ export SKIP_PLANET_UPDATE="1"
# If unavailable then replace with a local file. # If unavailable then replace with a local file.
# TODO: keep the downloaded csv file from the latest run. # TODO: keep the downloaded csv file from the latest run.
#export CITIES_INFO_URL="" #export CITIES_INFO_URL=""
export TMPDIR="$BUILD_PATH/subways" export TMPDIR="$BUILD_PATH/subway"
# The output file, which needs post-processing by transit_graph_generator.py # The output file, which needs post-processing by transit_graph_generator.py
export MAPSME="$SUBWAYS_PATH/subways.json" export MAPSME="$SUBWAYS_PATH/subway.json"
# Produce additional files needed for https://cdn.organicmaps.app/subway/ # Produce additional files needed for https://cdn.organicmaps.app/subway/
export HTML_DIR="$SUBWAYS_VALIDATOR_PATH" export HTML_DIR="$SUBWAYS_VALIDATOR_PATH"
@@ -36,7 +36,7 @@ export DUMP_CITY_LIST="$SUBWAYS_VALIDATOR_PATH/cities.txt"
cp -r "$SUBWAYS_REPO_PATH"/render/* "$SUBWAYS_VALIDATOR_PATH/" cp -r "$SUBWAYS_REPO_PATH"/render/* "$SUBWAYS_VALIDATOR_PATH/"
TRANSIT_TOOL_PATH="$REPO_PATH/tools/python/transit" TRANSIT_TOOL_PATH="$REPO_PATH/tools/python/transit"
SUBWAYS_GRAPH_FILE="$SUBWAYS_PATH/subways.transit.json" SUBWAYS_GRAPH_FILE="$SUBWAYS_PATH/subway.transit.json"
activate_venv_at_path "$TRANSIT_TOOL_PATH" activate_venv_at_path "$TRANSIT_TOOL_PATH"
"$PYTHON" "$TRANSIT_TOOL_PATH/transit_graph_generator.py" "$MAPSME" "$SUBWAYS_GRAPH_FILE" 2>&1 | tee -a "$SUBWAYS_LOG" "$PYTHON" "$TRANSIT_TOOL_PATH/transit_graph_generator.py" "$MAPSME" "$SUBWAYS_GRAPH_FILE" 2>&1 | tee -a "$SUBWAYS_LOG"

View File

@@ -20,6 +20,6 @@ PLANET_O5M="${PLANET_O5M:-$PLANET_PATH/planet-latest.o5m}"
# Subways # Subways
SUBWAYS_REPO_PATH="${SUBWAYS_REPO_PATH:-$CODE_PATH/subways}" SUBWAYS_REPO_PATH="${SUBWAYS_REPO_PATH:-$CODE_PATH/subways}"
SUBWAYS_PATH="${SUBWAYS_PATH:-$DATA_PATH/subways}" SUBWAYS_PATH="${SUBWAYS_PATH:-$DATA_PATH/subway}"
SUBWAYS_LOG="${SUBWAYS_LOG:-$SUBWAYS_PATH/subways.log}" SUBWAYS_LOG="${SUBWAYS_LOG:-$SUBWAYS_PATH/subway.log}"
SUBWAYS_VALIDATOR_PATH="${SUBWAYS_VALIDATOR_PATH:-$SUBWAYS_PATH/validator}" SUBWAYS_VALIDATOR_PATH="${SUBWAYS_VALIDATOR_PATH:-$SUBWAYS_PATH/validator}"

17
tools/unix/update_planet.sh Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -euxo pipefail
OSMUPDATE=~/osmctools/osmupdate
# osmconvert should be accessible in PATH.
PATH="$(dirname "$OSMUPDATE"):$PATH"
# Pass pbf or o5m file as a parameter
OLD="$1"
NEW="${1/.pbf/.new.pbf}"
NEW="${NEW/.o5m/.new.o5m}"
"$OSMUPDATE" -v --drop-authors --drop-version --hash-memory=512000 "$OLD" "$NEW"
# Uncomment to replace old planet.
mv "$NEW" "$OLD"
#md5sum -b "$OLD" > "$OLD.md5"
echo "Successfully updated $OLD"