mirror of
https://codeberg.org/comaps/comaps
synced 2026-01-13 07:34:31 +00:00
Compare commits
9 Commits
zy-docker-
...
zy-pano-bu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47fe00c76e | ||
|
|
083a364d4a | ||
|
|
9a16e3f69c | ||
|
|
9e45e04d03 | ||
|
|
e9406c0f36 | ||
|
|
4d862b0a8b | ||
|
|
01cdc24512 | ||
|
|
7722cc7d46 | ||
|
|
5eeeaeb288 |
@@ -114,7 +114,11 @@ jobs:
|
||||
|
||||
- name: Fallback manual apt install
|
||||
shell: bash
|
||||
run: apt install -y $APT_PACKAGES
|
||||
run: |
|
||||
if ! command -v pip &> /dev/null; then
|
||||
echo "pip not found, cache action failed, installing packages manually"
|
||||
apt install -y $APT_PACKAGES
|
||||
fi
|
||||
|
||||
- name: Generate pip cache key
|
||||
id: pip-cache-key
|
||||
@@ -146,7 +150,7 @@ jobs:
|
||||
- name: Cache world map
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: data/world_mwm
|
||||
path: world_mwm
|
||||
key: world-mwm
|
||||
|
||||
- uses: actions/cache@v4
|
||||
@@ -218,7 +222,11 @@ jobs:
|
||||
|
||||
- name: Fallback manual apt install
|
||||
shell: bash
|
||||
run: apt install -y $APT_PACKAGES
|
||||
run: |
|
||||
if ! command -v pip &> /dev/null; then
|
||||
echo "pip not found, cache action failed, installing packages manually"
|
||||
apt install -y $APT_PACKAGES
|
||||
fi
|
||||
|
||||
- name: Generate pip cache key
|
||||
id: pip-cache-key
|
||||
@@ -250,7 +258,7 @@ jobs:
|
||||
- name: Cache world map
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: data/world_mwm
|
||||
path: world_mwm
|
||||
key: world-mwm
|
||||
|
||||
- uses: actions/cache@v4
|
||||
|
||||
@@ -17,12 +17,22 @@ on:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
run-panoramax:
|
||||
description: 'Update Panoramax imagery?'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
# run-cleanup:
|
||||
# description: 'Clean up old build files?'
|
||||
# required: false
|
||||
# default: true
|
||||
# type: boolean
|
||||
run-tiger:
|
||||
description: 'Update TIGER address data?'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
run-planet-pbf:
|
||||
run-planet:
|
||||
description: 'Update PBF planet (for Wiki & subways)?'
|
||||
required: false
|
||||
default: true
|
||||
@@ -37,11 +47,11 @@ on:
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
run-planet-o5m:
|
||||
description: 'Update O5M planet (for mapgen)?'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
# run-planet-o5m:
|
||||
# description: 'Update O5M planet (for mapgen)?'
|
||||
# required: false
|
||||
# default: true
|
||||
# type: boolean
|
||||
run-mapgen:
|
||||
description: 'Run maps generation?'
|
||||
required: false
|
||||
@@ -80,11 +90,54 @@ env:
|
||||
ZULIP_API_KEY: ${{ secrets.ZULIP_API_KEY }}
|
||||
MWMTEST: ${{ inputs.map-generator-test }}
|
||||
MWMCONTINUE: ${{ inputs.map-generator-continue }}
|
||||
# MWMCOUNTRIES: ${{ inputs.map-generator-countries }}
|
||||
#TODO: undo inputs.map-generator-countries
|
||||
MWMCOUNTRIES: US_Oregon_Portland
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
TZ: Etc/UTC
|
||||
|
||||
jobs:
|
||||
cleanup-old-files:
|
||||
# if: inputs.run-cleanup
|
||||
name: Clean Up Old Files
|
||||
runs-on: mapfilemaker
|
||||
container:
|
||||
image: codeberg.org/comaps/maps_generator:f6d53d54f794
|
||||
volumes:
|
||||
- /mnt/4tbexternal/:/mnt/4tbexternal/
|
||||
- /mnt/4tbexternal/osm-planet:/home/planet
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-map-generator-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Remove intermediate data
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Checking for intermediate map build data in /mnt/4tbexternal/osm-maps..."
|
||||
cd /mnt/4tbexternal/osm-maps/
|
||||
# List all dated directories, sort by name (newest first)
|
||||
ls -1d */ 2>/dev/null | grep -E '^[0-9]{4}_[0-9]{2}_[0-9]{2}__[0-9]{2}_[0-9]{2}_[0-9]{2}/$' | while read dir; do
|
||||
if [ -d "$dir/intermediate_data" ]; then
|
||||
echo "Removing $dir/intermediate_data"
|
||||
fi
|
||||
if [ -d "$dir/osm2ft" ]; then
|
||||
echo "Removing $dir/osm2ft"
|
||||
fi
|
||||
if [ -f "$dir/world_roads.o5m" ]; then
|
||||
echo "Removing $dir/world_roads.o5m"
|
||||
fi
|
||||
done
|
||||
echo "Intermediate data cleaned up."
|
||||
- name: Remove old map builds (keep last 6)
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Checking for old map builds in /mnt/4tbexternal/osm-maps..."
|
||||
cd /mnt/4tbexternal/osm-maps/
|
||||
# List all dated directories, sort by name (newest first), skip first 6, delete the rest
|
||||
ls -1d */ 2>/dev/null | grep -E '^[0-9]{4}_[0-9]{2}_[0-9]{2}__[0-9]{2}_[0-9]{2}_[0-9]{2}/$' | sort -r | tail -n +7 | while read dir; do
|
||||
echo "Removing old build: $dir"
|
||||
rm -rf "$dir"
|
||||
done
|
||||
echo "Old map builds cleaned up."
|
||||
clone-repos:
|
||||
name: Clone Git Repos
|
||||
runs-on: mapfilemaker
|
||||
@@ -209,6 +262,130 @@ jobs:
|
||||
--data-urlencode topic=codeberg-bot \
|
||||
--data-urlencode 'content=Isolines are done!'
|
||||
|
||||
update-panoramax:
|
||||
if: inputs.run-panoramax
|
||||
name: Update Panoramax
|
||||
runs-on: mapfilemaker
|
||||
needs:
|
||||
- clone-repos
|
||||
container:
|
||||
image: codeberg.org/comaps/maps_generator:f6d53d54f794
|
||||
volumes:
|
||||
- /mnt/4tbexternal/:/mnt/4tbexternal/
|
||||
- /mnt/4tbexternal/osm-planet:/home/planet
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-map-generator-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: "~"
|
||||
key: cache-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
- name: Install Python dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
pip install pyarrow duckdb shapely
|
||||
- name: Download Panoramax Geoparquet
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p /home/planet/panoramax
|
||||
cd /home/planet/panoramax
|
||||
|
||||
PARQUET_UPDATED=false
|
||||
|
||||
# Download the global Panoramax geoparquet file (20GB)
|
||||
if [ ! -f panoramax.parquet ]; then
|
||||
echo "panoramax.parquet does not exist, will download"
|
||||
PARQUET_UPDATED=true
|
||||
else
|
||||
# Check if file is older than 7 days
|
||||
FILE_AGE_DAYS=$(( ($(date +%s) - $(stat -c %Y panoramax.parquet)) / 86400 ))
|
||||
echo "panoramax.parquet is $FILE_AGE_DAYS days old"
|
||||
|
||||
if [ $FILE_AGE_DAYS -gt 7 ]; then
|
||||
echo "File is older than 7 days, will re-download"
|
||||
PARQUET_UPDATED=true
|
||||
else
|
||||
echo "File is recent (< 7 days), skipping download"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$PARQUET_UPDATED" = "true" ]; then
|
||||
echo "Downloading Panoramax geoparquet..."
|
||||
curl -L -o panoramax.parquet.tmp https://api.panoramax.xyz/data/geoparquet/panoramax.parquet
|
||||
mv panoramax.parquet.tmp panoramax.parquet
|
||||
fi
|
||||
|
||||
# Export to GitHub environment for next step
|
||||
echo "PARQUET_UPDATED=$PARQUET_UPDATED" >> $GITHUB_ENV
|
||||
- name: Process Panoramax to per-country files
|
||||
shell: bash
|
||||
run: |
|
||||
cd ~/comaps
|
||||
mkdir -p /home/planet/panoramax/countries
|
||||
|
||||
SHOULD_PROCESS=false
|
||||
|
||||
# Check if parquet was just updated in this workflow run
|
||||
if [ "$PARQUET_UPDATED" = "true" ]; then
|
||||
echo "Parquet file was just updated, will process"
|
||||
SHOULD_PROCESS=true
|
||||
# Check if country files don't exist
|
||||
elif [ ! "$(ls -A /home/planet/panoramax/countries/*.panoramax 2>/dev/null)" ]; then
|
||||
echo "No country files exist, will process"
|
||||
SHOULD_PROCESS=true
|
||||
# Check if planet file is newer than last processing marker
|
||||
elif [ -f /home/planet/planet/planet.o5m ] && [ -f /home/planet/panoramax/countries/.last_processed ]; then
|
||||
if [ /home/planet/planet/planet.o5m -nt /home/planet/panoramax/countries/.last_processed ]; then
|
||||
echo "Planet file is newer than last processing, will process"
|
||||
SHOULD_PROCESS=true
|
||||
else
|
||||
echo "Country files are up-to-date, skipping processing"
|
||||
fi
|
||||
elif [ -f /home/planet/planet/planet.o5m ]; then
|
||||
echo "No processing marker exists but planet file does, will process"
|
||||
SHOULD_PROCESS=true
|
||||
else
|
||||
echo "Country files are up-to-date, skipping processing"
|
||||
fi
|
||||
|
||||
if [ "$SHOULD_PROCESS" = "true" ]; then
|
||||
echo "Processing panoramax data to per-country files..."
|
||||
python3 tools/python/maps_generator/panoramax_preprocessor.py \
|
||||
--input /home/planet/panoramax/panoramax.parquet \
|
||||
--output /home/planet/panoramax/countries \
|
||||
--borders-dir ~/comaps/data/borders
|
||||
|
||||
# Mark when processing completed (persists in /home/planet for timestamp comparison)
|
||||
touch /home/planet/panoramax/countries/.last_processed
|
||||
fi
|
||||
|
||||
# Export to GitHub environment for notification step
|
||||
echo "PANORAMAX_PROCESSED=$SHOULD_PROCESS" >> $GITHUB_ENV
|
||||
- name: Check panoramax files
|
||||
shell: bash
|
||||
run: |
|
||||
NUMPANO=$(ls -1 /home/planet/panoramax/countries/*.panoramax 2>/dev/null | wc -l)
|
||||
echo "Found $NUMPANO panoramax country files"
|
||||
if [ $NUMPANO -lt 5 ]; then
|
||||
echo "ERROR: Did generation fail? Expected at least 5 country files"
|
||||
exit 1
|
||||
fi
|
||||
- name: Notify Zulip
|
||||
shell: bash
|
||||
run: |
|
||||
# Only notify if processing actually happened in this workflow run
|
||||
if [ "$PANORAMAX_PROCESSED" = "true" ]; then
|
||||
curl -X POST https://comaps.zulipchat.com/api/v1/messages \
|
||||
-u $ZULIP_BOT_EMAIL:$ZULIP_API_KEY \
|
||||
--data-urlencode type=stream \
|
||||
--data-urlencode 'to="DevOps"' \
|
||||
--data-urlencode topic=codeberg-bot \
|
||||
--data-urlencode 'content=Panoramax processing is done!'
|
||||
else
|
||||
echo "No processing occurred in this run, skipping notification"
|
||||
fi
|
||||
|
||||
update-tiger:
|
||||
if: inputs.run-tiger
|
||||
name: Update TIGER
|
||||
@@ -247,7 +424,7 @@ jobs:
|
||||
tar -xOzf /home/planet/tiger-nominatim-preprocessed-latest.csv.tar.gz | ~/omim-build-relwithdebinfo/address_parser_tool --output_path=/home/planet/tiger
|
||||
|
||||
update-planet-pbf:
|
||||
if: inputs.run-planet-pbf
|
||||
if: inputs.run-planet
|
||||
name: Update PBF Planet
|
||||
runs-on: mapfilemaker
|
||||
container:
|
||||
@@ -431,7 +608,7 @@ jobs:
|
||||
--data-urlencode 'content=Wiki update is done!'
|
||||
|
||||
update-planet-o5m:
|
||||
if: inputs.run-planet-o5m
|
||||
if: inputs.run-planet
|
||||
name: Update O5M Planet
|
||||
runs-on: mapfilemaker
|
||||
container:
|
||||
@@ -574,4 +751,3 @@ jobs:
|
||||
--data-urlencode 'to="DevOps"' \
|
||||
--data-urlencode topic=codeberg-bot \
|
||||
--data-urlencode 'content=Upload is done!'
|
||||
|
||||
|
||||
@@ -74,6 +74,11 @@ public class PlacePageButtonFactory
|
||||
titleId = R.string.avoid_ferry;
|
||||
yield R.drawable.ic_avoid_ferry;
|
||||
}
|
||||
case PANORAMAX ->
|
||||
{
|
||||
titleId = R.string.panoramax;
|
||||
yield R.drawable.ic_camera;
|
||||
}
|
||||
case MORE ->
|
||||
{
|
||||
titleId = R.string.placepage_more_button;
|
||||
|
||||
@@ -144,6 +144,7 @@ public final class PlacePageButtons extends Fragment implements Observer<List<Pl
|
||||
ROUTE_AVOID_TOLL,
|
||||
ROUTE_AVOID_FERRY,
|
||||
ROUTE_AVOID_UNPAVED,
|
||||
PANORAMAX,
|
||||
MORE
|
||||
}
|
||||
|
||||
|
||||
@@ -428,6 +428,7 @@ public class PlacePageController
|
||||
case ROUTE_AVOID_TOLL -> onAvoidTollBtnClicked();
|
||||
case ROUTE_AVOID_UNPAVED -> onAvoidUnpavedBtnClicked();
|
||||
case ROUTE_AVOID_FERRY -> onAvoidFerryBtnClicked();
|
||||
case PANORAMAX -> onPanoramaxBtnClicked();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -499,6 +500,19 @@ public class PlacePageController
|
||||
requireActivity().finish();
|
||||
}
|
||||
|
||||
private void onPanoramaxBtnClicked()
|
||||
{
|
||||
if (mMapObject == null)
|
||||
return;
|
||||
String url = Framework.nativeGetPanoramaxUrl();
|
||||
if (!TextUtils.isEmpty(url))
|
||||
{
|
||||
Intent intent = new Intent(Intent.ACTION_VIEW);
|
||||
intent.setData(android.net.Uri.parse(url));
|
||||
startActivity(intent);
|
||||
}
|
||||
}
|
||||
|
||||
private void onRouteFromBtnClicked()
|
||||
{
|
||||
if (mMapObject == null)
|
||||
@@ -637,6 +651,10 @@ public class PlacePageController
|
||||
buttons.add(mapObject.isBookmark() ? PlacePageButtons.ButtonType.BOOKMARK_DELETE
|
||||
: PlacePageButtons.ButtonType.BOOKMARK_SAVE);
|
||||
}
|
||||
|
||||
// Add Panoramax button if imagery is available
|
||||
if (Framework.nativeHasPanoramax())
|
||||
buttons.add(PlacePageButtons.ButtonType.PANORAMAX);
|
||||
}
|
||||
mViewModel.setCurrentButtons(buttons);
|
||||
}
|
||||
|
||||
@@ -1764,6 +1764,16 @@ JNIEXPORT jboolean JNICALL Java_app_organicmaps_sdk_Framework_nativeHasPlacePage
|
||||
return static_cast<jboolean>(frm()->HasPlacePageInfo());
|
||||
}
|
||||
|
||||
JNIEXPORT jboolean JNICALL Java_app_organicmaps_sdk_Framework_nativeHasPanoramax(JNIEnv *, jclass)
|
||||
{
|
||||
return static_cast<jboolean>(g_framework->GetPlacePageInfo().HasPanoramax());
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL Java_app_organicmaps_sdk_Framework_nativeGetPanoramaxUrl(JNIEnv * env, jclass)
|
||||
{
|
||||
return jni::ToJavaString(env, g_framework->GetPlacePageInfo().GetPanoramaxUrl());
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_app_organicmaps_sdk_Framework_nativeMemoryWarning(JNIEnv *, jclass)
|
||||
{
|
||||
return frm()->MemoryWarning();
|
||||
|
||||
@@ -349,6 +349,8 @@ public class Framework
|
||||
* @return true if c++ framework has initialized internal place page object, otherwise - false.
|
||||
*/
|
||||
public static native boolean nativeHasPlacePageInfo();
|
||||
public static native boolean nativeHasPanoramax();
|
||||
public static native String nativeGetPanoramaxUrl();
|
||||
|
||||
public static native void nativeMemoryWarning();
|
||||
public static native void nativeSaveRoute();
|
||||
|
||||
@@ -1758,3 +1758,4 @@ amenity|luggage_locker;1629;
|
||||
building|guardhouse;[building=guardhouse],[amenity=security_booth],[amenity=checkpoint];;;;1630;
|
||||
office|security;1631;
|
||||
shop|lighting;1632;
|
||||
panoramax|image;1633;
|
||||
|
||||
|
Can't render this file because it contains an unexpected character in line 7 and column 16.
|
@@ -147,6 +147,8 @@ set(SRC
|
||||
osm_o5m_source.hpp
|
||||
osm_source.cpp
|
||||
osm_xml_source.hpp
|
||||
panoramax_generator.cpp
|
||||
panoramax_generator.hpp
|
||||
place_processor.cpp
|
||||
place_processor.hpp
|
||||
platform_helpers.cpp
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/final_processor_utils.hpp"
|
||||
#include "generator/isolines_generator.hpp"
|
||||
#include "generator/panoramax_generator.hpp"
|
||||
#include "generator/mini_roundabout_transformer.hpp"
|
||||
#include "generator/node_mixer.hpp"
|
||||
#include "generator/osm2type.hpp"
|
||||
@@ -68,6 +69,10 @@ void CountryFinalProcessor::Process()
|
||||
if (!m_isolinesPath.empty())
|
||||
AddIsolines();
|
||||
|
||||
LOG(LINFO, ("Adding panoramax..."));
|
||||
if (!m_panoramaxPath.empty())
|
||||
AddPanoramax();
|
||||
|
||||
// DropProhibitedSpeedCameras();
|
||||
LOG(LINFO, ("Processing building parts..."));
|
||||
ProcessBuildingParts();
|
||||
@@ -293,6 +298,22 @@ void CountryFinalProcessor::AddAddresses()
|
||||
LOG(LINFO, ("Total addresses:", totalStats));
|
||||
}
|
||||
|
||||
void CountryFinalProcessor::AddPanoramax()
|
||||
{
|
||||
if (m_panoramaxPath.empty())
|
||||
return;
|
||||
|
||||
PanoramaxFeaturesGenerator panoramaxGenerator(m_panoramaxPath);
|
||||
ForEachMwmTmp(m_temporaryMwmPath, [&](auto const & name, auto const & path)
|
||||
{
|
||||
if (!IsCountry(name))
|
||||
return;
|
||||
|
||||
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, FileWriter::Op::OP_APPEND);
|
||||
panoramaxGenerator.GeneratePanoramax(name, [&](auto const & fb) { writer.Write(fb); });
|
||||
}, m_threadsCount);
|
||||
}
|
||||
|
||||
void CountryFinalProcessor::ProcessCoastline()
|
||||
{
|
||||
/// @todo We can remove MinSize at all.
|
||||
|
||||
@@ -24,6 +24,7 @@ public:
|
||||
|
||||
void SetIsolinesDir(std::string const & dir) { m_isolinesPath = dir; }
|
||||
void SetAddressesDir(std::string const & dir) { m_addressPath = dir; }
|
||||
void SetPanoramaxDir(std::string const & dir) { m_panoramaxPath = dir; }
|
||||
|
||||
void SetCityBoundariesFiles(std::string const & collectorFile) { m_boundariesCollectorFile = collectorFile; }
|
||||
|
||||
@@ -39,6 +40,7 @@ private:
|
||||
void AddFakeNodes();
|
||||
void AddIsolines();
|
||||
void AddAddresses();
|
||||
void AddPanoramax();
|
||||
void DropProhibitedSpeedCameras();
|
||||
// void Finish();
|
||||
|
||||
@@ -47,7 +49,7 @@ private:
|
||||
std::string m_borderPath;
|
||||
std::string m_temporaryMwmPath;
|
||||
std::string m_intermediateDir;
|
||||
std::string m_isolinesPath, m_addressPath;
|
||||
std::string m_isolinesPath, m_addressPath, m_panoramaxPath;
|
||||
std::string m_boundariesCollectorFile;
|
||||
std::string m_coastlineGeomFilename;
|
||||
std::string m_worldCoastsFilename;
|
||||
|
||||
@@ -39,8 +39,8 @@ struct GenerateInfo
|
||||
|
||||
std::string m_cacheDir;
|
||||
|
||||
// External folders with additional preprocessed data (isolines, addresses).
|
||||
std::string m_isolinesDir, m_addressesDir;
|
||||
// External folders with additional preprocessed data (isolines, addresses, panoramax).
|
||||
std::string m_isolinesDir, m_addressesDir, m_panoramaxDir;
|
||||
|
||||
// Current generated file name if --output option is defined.
|
||||
std::string m_fileName;
|
||||
|
||||
@@ -107,6 +107,7 @@ DEFINE_string(nodes_list_path, "",
|
||||
DEFINE_bool(generate_isolines_info, false, "Generate the isolines info section");
|
||||
DEFINE_string(isolines_path, "", "Path to isolines directory. If set, adds isolines linear features.");
|
||||
DEFINE_string(addresses_path, "", "Path to addresses directory. If set, adds addr:interpolation features.");
|
||||
DEFINE_string(panoramax_path, "", "Path to panoramax directory. If set, adds panoramax imagery point features.");
|
||||
|
||||
// Routing.
|
||||
DEFINE_bool(make_routing_index, false, "Make sections with the routing information.");
|
||||
@@ -243,6 +244,7 @@ MAIN_WITH_ERROR_HANDLING([](int argc, char ** argv)
|
||||
genInfo.m_complexHierarchyFilename = FLAGS_complex_hierarchy_data;
|
||||
genInfo.m_isolinesDir = FLAGS_isolines_path;
|
||||
genInfo.m_addressesDir = FLAGS_addresses_path;
|
||||
genInfo.m_panoramaxDir = FLAGS_panoramax_path;
|
||||
|
||||
// Use merged style.
|
||||
GetStyleReader().SetCurrentStyle(MapStyleMerged);
|
||||
|
||||
141
generator/panoramax_generator.cpp
Normal file
141
generator/panoramax_generator.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
#include "generator/panoramax_generator.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/feature_meta.hpp"
|
||||
|
||||
#include "coding/file_reader.hpp"
|
||||
#include "coding/read_write_utils.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
std::string_view const kPanoramax = "panoramax";
|
||||
std::string_view const kImage = "image";
|
||||
|
||||
std::string GetPanoramaxFilePath(std::string const & countryName, std::string const & panoramaxDir)
|
||||
{
|
||||
return panoramaxDir + "/" + countryName + ".panoramax";
|
||||
}
|
||||
|
||||
struct PanoramaxPoint
|
||||
{
|
||||
double lat;
|
||||
double lon;
|
||||
std::string imageId;
|
||||
};
|
||||
|
||||
bool LoadPanoramaxPoints(std::string const & filePath, std::vector<PanoramaxPoint> & points)
|
||||
{
|
||||
try
|
||||
{
|
||||
std::ifstream file(filePath, std::ios::binary);
|
||||
if (!file.is_open())
|
||||
{
|
||||
LOG(LWARNING, ("Can't open panoramax file", filePath));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read header
|
||||
uint32_t version;
|
||||
uint64_t pointCount;
|
||||
|
||||
file.read(reinterpret_cast<char*>(&version), sizeof(version));
|
||||
file.read(reinterpret_cast<char*>(&pointCount), sizeof(pointCount));
|
||||
|
||||
if (version != 1)
|
||||
{
|
||||
LOG(LERROR, ("Unsupported panoramax file version", version));
|
||||
return false;
|
||||
}
|
||||
|
||||
points.reserve(static_cast<size_t>(pointCount));
|
||||
|
||||
// Read points
|
||||
for (uint64_t i = 0; i < pointCount; ++i)
|
||||
{
|
||||
PanoramaxPoint point;
|
||||
|
||||
file.read(reinterpret_cast<char*>(&point.lat), sizeof(point.lat));
|
||||
file.read(reinterpret_cast<char*>(&point.lon), sizeof(point.lon));
|
||||
|
||||
// Read image_id (length-prefixed string)
|
||||
uint32_t imageIdLength;
|
||||
file.read(reinterpret_cast<char*>(&imageIdLength), sizeof(imageIdLength));
|
||||
|
||||
if (imageIdLength > 0 && imageIdLength < 10000) // Sanity check
|
||||
{
|
||||
point.imageId.resize(imageIdLength);
|
||||
file.read(&point.imageId[0], imageIdLength);
|
||||
}
|
||||
|
||||
if (file.fail())
|
||||
{
|
||||
LOG(LERROR, ("Error reading panoramax point", i, "from", filePath));
|
||||
return false;
|
||||
}
|
||||
|
||||
points.push_back(std::move(point));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (std::exception const & e)
|
||||
{
|
||||
LOG(LERROR, ("Exception loading panoramax file", filePath, ":", e.what()));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
PanoramaxFeaturesGenerator::PanoramaxFeaturesGenerator(std::string const & panoramaxDir)
|
||||
: m_panoramaxDir(panoramaxDir)
|
||||
{
|
||||
Classificator const & c = classif();
|
||||
m_panoramaxType = c.GetTypeByPath({kPanoramax, kImage});
|
||||
}
|
||||
|
||||
void PanoramaxFeaturesGenerator::GeneratePanoramax(std::string const & countryName,
|
||||
FeaturesCollectFn const & fn) const
|
||||
{
|
||||
auto const panoramaxPath = GetPanoramaxFilePath(countryName, m_panoramaxDir);
|
||||
|
||||
std::vector<PanoramaxPoint> points;
|
||||
if (!LoadPanoramaxPoints(panoramaxPath, points))
|
||||
{
|
||||
LOG(LWARNING, ("Can't load panoramax points for", countryName));
|
||||
return;
|
||||
}
|
||||
|
||||
LOG(LINFO, ("Generating", points.size(), "panoramax points for", countryName));
|
||||
|
||||
for (auto const & point : points)
|
||||
{
|
||||
feature::FeatureBuilder fb;
|
||||
|
||||
// Set point geometry
|
||||
m2::PointD const mercatorPoint = mercator::FromLatLon(point.lat, point.lon);
|
||||
fb.SetCenter(mercatorPoint);
|
||||
|
||||
// Add classificator type
|
||||
fb.AddType(m_panoramaxType);
|
||||
|
||||
// Add metadata with image ID
|
||||
if (!point.imageId.empty())
|
||||
{
|
||||
fb.GetMetadata().Set(feature::Metadata::FMD_PANORAMAX, point.imageId);
|
||||
}
|
||||
|
||||
fn(std::move(fb));
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
||||
24
generator/panoramax_generator.hpp
Normal file
24
generator/panoramax_generator.hpp
Normal file
@@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
// Generates Panoramax imagery point features from binary files.
|
||||
// Binary files are created by the panoramax_preprocessor.py script.
|
||||
class PanoramaxFeaturesGenerator
|
||||
{
|
||||
public:
|
||||
explicit PanoramaxFeaturesGenerator(std::string const & panoramaxDir);
|
||||
|
||||
using FeaturesCollectFn = std::function<void(feature::FeatureBuilder && fb)>;
|
||||
void GeneratePanoramax(std::string const & countryName, FeaturesCollectFn const & fn) const;
|
||||
|
||||
private:
|
||||
std::string m_panoramaxDir;
|
||||
uint32_t m_panoramaxType; // Classificator type for panoramax|image
|
||||
};
|
||||
} // namespace generator
|
||||
@@ -182,6 +182,7 @@ RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor(Affili
|
||||
auto finalProcessor = std::make_shared<CountryFinalProcessor>(affiliations, m_genInfo.m_tmpDir, m_threadsCount);
|
||||
finalProcessor->SetIsolinesDir(m_genInfo.m_isolinesDir);
|
||||
finalProcessor->SetAddressesDir(m_genInfo.m_addressesDir);
|
||||
finalProcessor->SetPanoramaxDir(m_genInfo.m_panoramaxDir);
|
||||
finalProcessor->SetMiniRoundabouts(m_genInfo.GetIntermediateFileName(MINI_ROUNDABOUTS_FILENAME));
|
||||
finalProcessor->SetAddrInterpolation(m_genInfo.GetIntermediateFileName(ADDR_INTERPOL_FILENAME));
|
||||
if (addAds)
|
||||
|
||||
@@ -706,6 +706,7 @@ void Framework::FillInfoFromFeatureType(FeatureType & ft, place_page::Info & inf
|
||||
info.SetFromFeatureType(ft);
|
||||
|
||||
FillDescription(ft, info);
|
||||
CheckPanoramaxImagery(info);
|
||||
|
||||
auto const mwmInfo = ft.GetID().m_mwmId.GetInfo();
|
||||
bool const isMapVersionEditable = CanEditMapForPosition(info.GetMercator());
|
||||
@@ -3263,6 +3264,43 @@ void Framework::FillDescription(FeatureType & ft, place_page::Info & info) const
|
||||
}
|
||||
}
|
||||
|
||||
void Framework::CheckPanoramaxImagery(place_page::Info & info) const
|
||||
{
|
||||
// Query features within 50m radius
|
||||
auto constexpr radiusM = 50.0;
|
||||
auto const center = info.GetMercator();
|
||||
auto const rect = mercator::RectByCenterXYAndSizeInMeters(center, radiusM);
|
||||
|
||||
auto const panoramaxType = classif().GetTypeByPath({"panoramax", "image"});
|
||||
|
||||
bool hasPanoramax = false;
|
||||
std::string panoramaxImageId;
|
||||
std::string panoramaxUrl;
|
||||
|
||||
m_featuresFetcher.GetDataSource().ForEachInRect([&](FeatureType & ft)
|
||||
{
|
||||
if (ft.GetTypes().Has(panoramaxType))
|
||||
{
|
||||
auto const imageId = ft.GetMetadata(feature::Metadata::FMD_PANORAMAX);
|
||||
if (!imageId.empty())
|
||||
{
|
||||
hasPanoramax = true;
|
||||
panoramaxImageId = std::string(imageId);
|
||||
panoramaxUrl = "https://panoramax.openstreetmap.fr/#focus=pic:" + panoramaxImageId;
|
||||
return base::ControlFlow::Break; // Found one, stop searching
|
||||
}
|
||||
}
|
||||
return base::ControlFlow::Continue;
|
||||
}, rect, df::GetDrawTileScale(rect));
|
||||
|
||||
if (hasPanoramax)
|
||||
{
|
||||
info.m_hasPanoramax = true;
|
||||
info.m_panoramaxImageId = std::move(panoramaxImageId);
|
||||
info.m_panoramaxUrl = std::move(panoramaxUrl);
|
||||
}
|
||||
}
|
||||
|
||||
void Framework::OnPowerFacilityChanged(power_management::Facility const facility, bool enabled)
|
||||
{
|
||||
if (facility == power_management::Facility::PerspectiveView || facility == power_management::Facility::Buildings3d)
|
||||
|
||||
@@ -640,6 +640,7 @@ private:
|
||||
void FillTrackInfo(Track const & track, m2::PointD const & trackPoint, place_page::Info & info) const;
|
||||
void SetPlacePageLocation(place_page::Info & info);
|
||||
void FillDescription(FeatureType & ft, place_page::Info & info) const;
|
||||
void CheckPanoramaxImagery(place_page::Info & info) const;
|
||||
|
||||
public:
|
||||
search::ReverseGeocoder::Address GetAddressAtPoint(m2::PointD const & pt) const;
|
||||
|
||||
@@ -114,6 +114,9 @@ public:
|
||||
bool HasApiUrl() const { return !m_apiUrl.empty(); }
|
||||
/// TODO: Support all possible Internet types in UI. @See MapObject::GetInternet().
|
||||
bool HasWifi() const { return GetInternet() == feature::Internet::Wlan; }
|
||||
/// @returns true if Panoramax imagery is available within 50m.
|
||||
bool HasPanoramax() const { return m_hasPanoramax; }
|
||||
std::string const & GetPanoramaxUrl() const { return m_panoramaxUrl; }
|
||||
/// Should be used by UI code to generate cool name for new bookmarks.
|
||||
// TODO: Tune new bookmark name. May be add address or some other data.
|
||||
kml::LocalizableString FormatNewBookmarkName() const;
|
||||
@@ -258,6 +261,11 @@ private:
|
||||
/// Formatted feature address for inner using.
|
||||
std::string m_address;
|
||||
|
||||
/// Panoramax
|
||||
bool m_hasPanoramax = false;
|
||||
std::string m_panoramaxImageId;
|
||||
std::string m_panoramaxUrl;
|
||||
|
||||
/// Routing
|
||||
RouteMarkType m_routeMarkType;
|
||||
size_t m_intermediateIndex = 0;
|
||||
|
||||
@@ -351,6 +351,10 @@ class PathProvider:
|
||||
def addresses_path() -> AnyStr:
|
||||
return settings.ADDRESSES_PATH
|
||||
|
||||
@staticmethod
|
||||
def panoramax_path() -> AnyStr:
|
||||
return settings.PANORAMAX_PATH
|
||||
|
||||
@staticmethod
|
||||
def borders_path() -> AnyStr:
|
||||
return os.path.join(settings.USER_RESOURCE_PATH, "borders")
|
||||
|
||||
@@ -60,6 +60,7 @@ class GenTool:
|
||||
"intermediate_data_path": str,
|
||||
"isolines_path": str,
|
||||
"addresses_path": str,
|
||||
"panoramax_path": str,
|
||||
"nodes_list_path": str,
|
||||
"node_storage": str,
|
||||
"osm_file_name": str,
|
||||
|
||||
@@ -121,6 +121,7 @@ US_POSTCODES_URL = ""
|
||||
SRTM_PATH = ""
|
||||
ISOLINES_PATH = ""
|
||||
ADDRESSES_PATH = ""
|
||||
PANORAMAX_PATH = ""
|
||||
|
||||
# Stats section:
|
||||
STATS_TYPES_CONFIG = os.path.join(ETC_DIR, "stats_types_config.txt")
|
||||
@@ -278,6 +279,7 @@ def init(default_settings_path: AnyStr):
|
||||
global SRTM_PATH
|
||||
global ISOLINES_PATH
|
||||
global ADDRESSES_PATH
|
||||
global PANORAMAX_PATH
|
||||
|
||||
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
|
||||
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", md5_ext(PLANET_URL))
|
||||
@@ -306,6 +308,7 @@ def init(default_settings_path: AnyStr):
|
||||
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
|
||||
ISOLINES_PATH = cfg.get_opt_path("External", "ISOLINES_PATH", ISOLINES_PATH)
|
||||
ADDRESSES_PATH = cfg.get_opt_path("External", "ADDRESSES_PATH", ADDRESSES_PATH)
|
||||
PANORAMAX_PATH = cfg.get_opt_path("External", "PANORAMAX_PATH", PANORAMAX_PATH)
|
||||
|
||||
# Stats section:
|
||||
global STATS_TYPES_CONFIG
|
||||
|
||||
@@ -134,6 +134,8 @@ class StageFeatures(Stage):
|
||||
if is_accepted(env, StageIsolinesInfo):
|
||||
extra.update({"isolines_path": PathProvider.isolines_path()})
|
||||
extra.update({"addresses_path": PathProvider.addresses_path()})
|
||||
if PathProvider.panoramax_path():
|
||||
extra.update({"panoramax_path": PathProvider.panoramax_path()})
|
||||
|
||||
steps.step_features(env, **extra)
|
||||
if os.path.exists(env.paths.packed_polygons_path):
|
||||
|
||||
403
tools/python/maps_generator/panoramax_preprocessor.py
Normal file
403
tools/python/maps_generator/panoramax_preprocessor.py
Normal file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Panoramax Preprocessor
|
||||
|
||||
Converts the global Panoramax geoparquet file into per-country binary files
|
||||
for use in the map generator.
|
||||
|
||||
The script streams the large geoparquet file (20GB+) using DuckDB to avoid
|
||||
loading everything into memory, performs a spatial join with country polygons,
|
||||
and writes compact binary files for each country.
|
||||
|
||||
Binary Format:
|
||||
Header:
|
||||
uint32 version (=1)
|
||||
uint64 point_count
|
||||
Data (repeated point_count times):
|
||||
double lat (8 bytes)
|
||||
double lon (8 bytes)
|
||||
string image_id (length-prefixed: uint32 length + bytes)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import struct
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
from collections import defaultdict
|
||||
|
||||
try:
|
||||
import duckdb
|
||||
except ImportError:
|
||||
print("Error: duckdb is required. Install with: pip install duckdb", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
from shapely.geometry import Point, Polygon, MultiPolygon
|
||||
from shapely.strtree import STRtree
|
||||
except ImportError:
|
||||
print("Error: shapely is required. Install with: pip install shapely", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_poly_file(poly_path: Path) -> MultiPolygon:
|
||||
"""
|
||||
Parse an Osmosis .poly file and return a Shapely MultiPolygon.
|
||||
|
||||
.poly format:
|
||||
Line 1: Region name
|
||||
Section N: (numbered 1, 2, 3...)
|
||||
lon lat (pairs of coordinates)
|
||||
...
|
||||
END
|
||||
"""
|
||||
with open(poly_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
polygons = []
|
||||
current_coords = []
|
||||
in_section = False
|
||||
|
||||
for line in lines[1:]: # Skip first line (region name)
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if line.upper() == 'END':
|
||||
if current_coords:
|
||||
# Close the polygon if needed
|
||||
if current_coords[0] != current_coords[-1]:
|
||||
current_coords.append(current_coords[0])
|
||||
|
||||
# Create polygon (need at least 3 points + closing point)
|
||||
if len(current_coords) >= 4:
|
||||
try:
|
||||
poly = Polygon(current_coords)
|
||||
|
||||
# If polygon is invalid, try to fix it
|
||||
if not poly.is_valid:
|
||||
# Try buffer(0) trick to fix self-intersections
|
||||
poly = poly.buffer(0)
|
||||
|
||||
# Only accept if it's now valid and is a Polygon or MultiPolygon
|
||||
if poly.is_valid and not poly.is_empty:
|
||||
if poly.geom_type == 'Polygon':
|
||||
polygons.append(poly)
|
||||
elif poly.geom_type == 'MultiPolygon':
|
||||
# Split multipolygon into individual polygons
|
||||
polygons.extend(poly.geoms)
|
||||
else:
|
||||
logger.debug(f"Skipping invalid section in {poly_path.name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error creating polygon in {poly_path.name}: {e}")
|
||||
|
||||
current_coords = []
|
||||
in_section = False
|
||||
continue
|
||||
|
||||
# Try to parse as section number
|
||||
try:
|
||||
int(line)
|
||||
in_section = True
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Parse coordinate pair
|
||||
if in_section:
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
lon = float(parts[0])
|
||||
lat = float(parts[1])
|
||||
current_coords.append((lon, lat))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if not polygons:
|
||||
logger.warning(f"No valid polygons found in {poly_path.name}")
|
||||
return None
|
||||
|
||||
if len(polygons) == 1:
|
||||
return MultiPolygon([polygons[0]])
|
||||
else:
|
||||
return MultiPolygon(polygons)
|
||||
|
||||
|
||||
def load_country_polygons(borders_dir: Path) -> Dict[str, MultiPolygon]:
|
||||
"""
|
||||
Load all .poly files from the borders directory.
|
||||
|
||||
Returns a dict mapping region name (without .poly extension) to MultiPolygon.
|
||||
"""
|
||||
logger.info(f"Loading .poly files from {borders_dir}")
|
||||
|
||||
poly_files = list(borders_dir.glob("*.poly"))
|
||||
logger.info(f"Found {len(poly_files)} .poly files")
|
||||
|
||||
polygons = {}
|
||||
|
||||
for poly_file in poly_files:
|
||||
region_name = poly_file.stem # Filename without .poly extension
|
||||
|
||||
try:
|
||||
multi_polygon = parse_poly_file(poly_file)
|
||||
if multi_polygon:
|
||||
polygons[region_name] = multi_polygon
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing {poly_file.name}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully loaded {len(polygons)} region polygons")
|
||||
return polygons
|
||||
|
||||
|
||||
class RegionFinder:
|
||||
"""
|
||||
Efficient spatial index for finding which region a point belongs to.
|
||||
Uses Shapely's STRtree for fast spatial queries.
|
||||
"""
|
||||
def __init__(self, regions: Dict[str, MultiPolygon]):
|
||||
logger.info("Building spatial index for region lookup...")
|
||||
|
||||
self.regions = regions
|
||||
self.region_names = []
|
||||
self.geometries = []
|
||||
|
||||
for region_name, multi_polygon in regions.items():
|
||||
self.region_names.append(region_name)
|
||||
self.geometries.append(multi_polygon)
|
||||
|
||||
# Build R-tree spatial index for fast lookups
|
||||
self.tree = STRtree(self.geometries)
|
||||
|
||||
logger.info(f"Spatial index built with {len(self.geometries)} regions")
|
||||
|
||||
def find_region(self, lat: float, lon: float) -> str:
|
||||
"""
|
||||
Find which region a coordinate belongs to.
|
||||
|
||||
Returns region name or None if not found.
|
||||
"""
|
||||
point = Point(lon, lat) # Note: Shapely uses (x, y) = (lon, lat)
|
||||
|
||||
# Query the spatial index for candidate polygons
|
||||
candidates = self.tree.query(point)
|
||||
|
||||
# Check each candidate to see if point is actually inside
|
||||
for idx in candidates:
|
||||
if self.geometries[idx].contains(point):
|
||||
return self.region_names[idx]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def write_binary_file(output_path: Path, points: List[Tuple[float, float, str]]):
|
||||
"""
|
||||
Write panoramax points to binary file.
|
||||
|
||||
Format:
|
||||
Header:
|
||||
uint32 version = 1
|
||||
uint64 point_count
|
||||
Data:
|
||||
For each point:
|
||||
double lat
|
||||
double lon
|
||||
uint32 image_id_length
|
||||
bytes image_id
|
||||
"""
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, 'wb') as f:
|
||||
# Write header
|
||||
version = 1
|
||||
point_count = len(points)
|
||||
f.write(struct.pack('<I', version)) # uint32 version
|
||||
f.write(struct.pack('<Q', point_count)) # uint64 point_count
|
||||
|
||||
# Write points
|
||||
for lat, lon, image_id in points:
|
||||
f.write(struct.pack('<d', lat)) # double lat
|
||||
f.write(struct.pack('<d', lon)) # double lon
|
||||
|
||||
# Write image_id as length-prefixed string
|
||||
image_id_bytes = image_id.encode('utf-8')
|
||||
f.write(struct.pack('<I', len(image_id_bytes))) # uint32 length
|
||||
f.write(image_id_bytes) # bytes
|
||||
|
||||
logger.info(f"Wrote {point_count} points to {output_path}")
|
||||
|
||||
|
||||
def process_parquet_streaming(parquet_url: str, output_dir: Path, borders_dir: Path, batch_size: int = 100000):
|
||||
"""
|
||||
Stream the Panoramax parquet file and write per-country binary files.
|
||||
|
||||
Uses DuckDB to stream the large parquet file without loading it entirely into memory.
|
||||
Uses .poly files from borders_dir to categorize points into regions.
|
||||
"""
|
||||
# Load region polygons and build spatial index
|
||||
regions = load_country_polygons(borders_dir)
|
||||
if not regions:
|
||||
logger.error("No regions loaded - cannot process panoramax data")
|
||||
return
|
||||
|
||||
region_finder = RegionFinder(regions)
|
||||
|
||||
conn = duckdb.connect(database=':memory:')
|
||||
|
||||
# Enable httpfs extension for remote file access
|
||||
try:
|
||||
conn.execute("INSTALL httpfs;")
|
||||
conn.execute("LOAD httpfs;")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load httpfs extension: {e}")
|
||||
|
||||
# Install spatial extension for future country boundary support
|
||||
try:
|
||||
conn.execute("INSTALL spatial;")
|
||||
conn.execute("LOAD spatial;")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load spatial extension: {e}")
|
||||
|
||||
logger.info(f"Reading parquet file: {parquet_url}")
|
||||
|
||||
# First, inspect the schema to understand the columns
|
||||
try:
|
||||
schema_result = conn.execute(f"DESCRIBE SELECT * FROM read_parquet('{parquet_url}') LIMIT 0").fetchall()
|
||||
logger.info(f"Parquet schema: {[col[0] for col in schema_result]}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not read schema: {e}")
|
||||
|
||||
# Dictionary to accumulate points per country
|
||||
country_points: Dict[str, List[Tuple[float, float, str]]] = defaultdict(list)
|
||||
|
||||
# Stream the parquet file in batches
|
||||
# Geoparquet stores geometry as GEOMETRY type
|
||||
# Use DuckDB spatial functions to extract lat/lon
|
||||
query = f"""
|
||||
SELECT
|
||||
ST_Y(geometry) as lat,
|
||||
ST_X(geometry) as lon,
|
||||
id as image_id
|
||||
FROM read_parquet('{parquet_url}')
|
||||
WHERE geometry IS NOT NULL
|
||||
"""
|
||||
|
||||
try:
|
||||
result = conn.execute(query)
|
||||
|
||||
batch_count = 0
|
||||
total_points = 0
|
||||
|
||||
while True:
|
||||
batch = result.fetchmany(batch_size)
|
||||
if not batch:
|
||||
break
|
||||
|
||||
batch_count += 1
|
||||
batch_size_actual = len(batch)
|
||||
total_points += batch_size_actual
|
||||
|
||||
logger.info(f"Processing batch {batch_count}: {batch_size_actual} points (total: {total_points})")
|
||||
|
||||
for row in batch:
|
||||
lat, lon, image_id = row
|
||||
|
||||
# Find which region this point belongs to
|
||||
region = region_finder.find_region(lat, lon)
|
||||
|
||||
# Only add points that fall within a defined region
|
||||
if region:
|
||||
country_points[region].append((lat, lon, str(image_id)))
|
||||
|
||||
# Periodically write to disk to avoid memory issues
|
||||
if batch_count % 10 == 0:
|
||||
for country, points in country_points.items():
|
||||
if len(points) > 100000: # Write if accumulated > 100k points
|
||||
output_file = output_dir / f"{country}.panoramax"
|
||||
# Append mode for incremental writing
|
||||
# TODO: Implement append mode or accumulate all then write once
|
||||
logger.info(f"Country {country} has {len(points)} points accumulated")
|
||||
|
||||
logger.info(f"Finished processing {total_points} total points")
|
||||
logger.info(f"Countries found: {list(country_points.keys())}")
|
||||
|
||||
# Write final output files
|
||||
for country, points in country_points.items():
|
||||
if points:
|
||||
output_file = output_dir / f"{country}.panoramax"
|
||||
write_binary_file(output_file, points)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing parquet: {e}")
|
||||
raise
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Convert Panoramax geoparquet to per-country binary files",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--input',
|
||||
default='https://api.panoramax.xyz/data/geoparquet/panoramax.parquet',
|
||||
help='Path or URL to Panoramax geoparquet file (default: official Panoramax URL)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
type=Path,
|
||||
required=True,
|
||||
help='Output directory for per-country .panoramax files'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--borders-dir',
|
||||
type=Path,
|
||||
default=Path(__file__).parent.parent.parent.parent / 'data' / 'borders',
|
||||
help='Path to directory containing .poly border files (default: <repo>/data/borders)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--batch-size',
|
||||
type=int,
|
||||
default=100000,
|
||||
help='Number of rows to process per batch (default: 100000)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logger.info("Panoramax Preprocessor starting")
|
||||
logger.info(f"Input: {args.input}")
|
||||
logger.info(f"Output directory: {args.output}")
|
||||
logger.info(f"Borders directory: {args.borders_dir}")
|
||||
logger.info(f"Batch size: {args.batch_size}")
|
||||
|
||||
# Verify borders directory exists
|
||||
if not args.borders_dir.exists():
|
||||
logger.error(f"Borders directory not found: {args.borders_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create output directory
|
||||
args.output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process the parquet file
|
||||
process_parquet_streaming(args.input, args.output, args.borders_dir, args.batch_size)
|
||||
|
||||
logger.info("Panoramax preprocessing complete!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -80,6 +80,7 @@ SUBWAY_URL: file:///home/planet/subway/subways.transit.json
|
||||
SRTM_PATH: /home/planet/SRTM-patched-europe/
|
||||
ISOLINES_PATH: /home/planet/isolines/
|
||||
ADDRESSES_PATH: /home/planet/tiger/
|
||||
PANORAMAX_PATH: /home/planet/panoramax/countries/
|
||||
|
||||
# Local path (not url!) to .csv files.
|
||||
UK_POSTCODES_URL: /home/planet/postcodes/gb-postcode-data/gb_postcodes.csv
|
||||
|
||||
@@ -13,6 +13,7 @@ mkdir -p /home/planet/postcodes/gb-postcode-data/
|
||||
mkdir -p /home/planet/postcodes/us-postcodes/
|
||||
mkdir -p /home/planet/SRTM-patched-europe/
|
||||
mkdir -p /home/planet/subway
|
||||
mkdir -p /home/planet/panoramax/countries/
|
||||
|
||||
echo "<$(date +%T)> Running ./configure.sh ..."
|
||||
cd ~/comaps
|
||||
|
||||
Reference in New Issue
Block a user