Compare commits

...

2 Commits

Author SHA1 Message Date
zyphlar
7722cc7d46 add missing files, add map build cleanup, temporarily build only one region
Signed-off-by: zyphlar <zyphlar@gmail.com>
2026-01-03 22:17:05 -08:00
zyphlar
5eeeaeb288 initial attempt at panoramax layer
Signed-off-by: zyphlar <zyphlar@gmail.com>
2026-01-03 16:44:29 -08:00
23 changed files with 666 additions and 5 deletions

View File

@@ -17,6 +17,17 @@ on:
required: false
default: false
type: boolean
# TODO: enable
# run-panoramax:
# description: 'Update Panoramax imagery?'
# required: false
# default: true
# type: boolean
run-cleanup:
description: 'Clean up old build files?'
required: false
default: false
type: boolean
run-tiger:
description: 'Update TIGER address data?'
required: false
@@ -80,11 +91,49 @@ env:
ZULIP_API_KEY: ${{ secrets.ZULIP_API_KEY }}
MWMTEST: ${{ inputs.map-generator-test }}
MWMCONTINUE: ${{ inputs.map-generator-continue }}
# MWMCOUNTRIES: ${{ inputs.map-generator-countries }}
#TODO: undo ${{ inputs.map-generator-countries }}
MWMCOUNTRIES: US_Oregon_Portland
DEBIAN_FRONTEND: noninteractive
TZ: Etc/UTC
jobs:
cleanup-old-files:
if: inputs.run-cleanup
name: Clean Up Old Files
runs-on: mapfilemaker
container:
image: codeberg.org/comaps/maps_generator:f6d53d54f794
volumes:
- /mnt/4tbexternal/:/mnt/4tbexternal/
- /mnt/4tbexternal/osm-planet:/home/planet
concurrency:
group: ${{ github.workflow }}-map-generator-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
steps:
- name: Remove intermediate data
shell: bash
run: |
echo "Checking for intermediate map build data in /mnt/4tbexternal/osm-maps..."
cd /mnt/4tbexternal/osm-maps/
# List all dated directories, sort by name (newest first)
ls -1d */ 2>/dev/null | grep -E '^[0-9]{4}_[0-9]{2}_[0-9]{2}__[0-9]{2}_[0-9]{2}_[0-9]{2}/$' | while read dir; do
echo "Removing any intermediate data: $dir"
rm -rf "$dir/intermediate_data"
rm -rf "$dir/osm2ft"
rm -rf "$dir/world_roads.o5m"
done
echo "Intermediate data cleaned up."
- name: Remove old map builds (keep last 6)
shell: bash
run: |
echo "Checking for old map builds in /mnt/4tbexternal/osm-maps..."
cd /mnt/4tbexternal/osm-maps/
# List all dated directories, sort by name (newest first), skip first 6, delete the rest
ls -1d */ 2>/dev/null | grep -E '^[0-9]{4}_[0-9]{2}_[0-9]{2}__[0-9]{2}_[0-9]{2}_[0-9]{2}/$' | sort -r | tail -n +7 | while read dir; do
echo "Removing old build: $dir"
rm -rf "$dir"
done
echo "Old map builds cleaned up."
clone-repos:
name: Clone Git Repos
runs-on: mapfilemaker
@@ -209,6 +258,70 @@ jobs:
--data-urlencode topic=codeberg-bot \
--data-urlencode 'content=Isolines are done!'
update-panoramax:
# TODO: uncommenbt
# if: inputs.run-panoramax
name: Update Panoramax
runs-on: mapfilemaker
needs:
- clone-repos
container:
image: codeberg.org/comaps/maps_generator:f6d53d54f794
volumes:
- /mnt/4tbexternal/:/mnt/4tbexternal/
- /mnt/4tbexternal/osm-planet:/home/planet
concurrency:
group: ${{ github.workflow }}-map-generator-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
steps:
- uses: actions/cache@v4
with:
path: "~"
key: cache-${{ github.run_id }}-${{ github.run_attempt }}
- name: Install Python dependencies
shell: bash
run: |
pip install --upgrade pip
pip install pyarrow duckdb
- name: Download Panoramax Geoparquet
shell: bash
run: |
mkdir -p /home/planet/panoramax
cd /home/planet/panoramax
# Download the global Panoramax geoparquet file (20GB)
if [ ! -f panoramax.parquet ]; then
echo "Downloading Panoramax geoparquet..."
curl -L -o panoramax.parquet https://api.panoramax.xyz/data/geoparquet/panoramax.parquet
else
echo "panoramax.parquet already exists, skipping download"
fi
- name: Process Panoramax to per-country files
shell: bash
run: |
cd ~/comaps
mkdir -p /home/planet/panoramax/countries
python3 tools/python/maps_generator/panoramax_preprocessor.py \
--input /home/planet/panoramax/panoramax.parquet \
--output /home/planet/panoramax/countries \
--polygons ~/comaps/data/packed_polygons.bin
- name: Check panoramax files
shell: bash
run: |
NUMPANO=$(ls -1 /home/planet/panoramax/countries/*.panoramax 2>/dev/null | wc -l)
echo "Found $NUMPANO panoramax country files"
if [ $NUMPANO -lt 5 ]; then
echo "ERROR: Did generation fail? Expected at least 5 country files"
exit 1
fi
- name: Notify Zulip
run: |
curl -X POST https://comaps.zulipchat.com/api/v1/messages \
-u $ZULIP_BOT_EMAIL:$ZULIP_API_KEY \
--data-urlencode type=stream \
--data-urlencode 'to="DevOps"' \
--data-urlencode topic=codeberg-bot \
--data-urlencode 'content=Panoramax processing is done!'
update-tiger:
if: inputs.run-tiger
name: Update TIGER
@@ -574,4 +687,3 @@ jobs:
--data-urlencode 'to="DevOps"' \
--data-urlencode topic=codeberg-bot \
--data-urlencode 'content=Upload is done!'

View File

@@ -74,6 +74,11 @@ public class PlacePageButtonFactory
titleId = R.string.avoid_ferry;
yield R.drawable.ic_avoid_ferry;
}
case PANORAMAX ->
{
titleId = R.string.panoramax;
yield R.drawable.ic_camera;
}
case MORE ->
{
titleId = R.string.placepage_more_button;

View File

@@ -144,6 +144,7 @@ public final class PlacePageButtons extends Fragment implements Observer<List<Pl
ROUTE_AVOID_TOLL,
ROUTE_AVOID_FERRY,
ROUTE_AVOID_UNPAVED,
PANORAMAX,
MORE
}

View File

@@ -428,6 +428,7 @@ public class PlacePageController
case ROUTE_AVOID_TOLL -> onAvoidTollBtnClicked();
case ROUTE_AVOID_UNPAVED -> onAvoidUnpavedBtnClicked();
case ROUTE_AVOID_FERRY -> onAvoidFerryBtnClicked();
case PANORAMAX -> onPanoramaxBtnClicked();
}
}
@@ -499,6 +500,19 @@ public class PlacePageController
requireActivity().finish();
}
private void onPanoramaxBtnClicked()
{
if (mMapObject == null)
return;
String url = Framework.nativeGetPanoramaxUrl();
if (!TextUtils.isEmpty(url))
{
Intent intent = new Intent(Intent.ACTION_VIEW);
intent.setData(android.net.Uri.parse(url));
startActivity(intent);
}
}
private void onRouteFromBtnClicked()
{
if (mMapObject == null)
@@ -637,6 +651,10 @@ public class PlacePageController
buttons.add(mapObject.isBookmark() ? PlacePageButtons.ButtonType.BOOKMARK_DELETE
: PlacePageButtons.ButtonType.BOOKMARK_SAVE);
}
// Add Panoramax button if imagery is available
if (Framework.nativeHasPanoramax())
buttons.add(PlacePageButtons.ButtonType.PANORAMAX);
}
mViewModel.setCurrentButtons(buttons);
}

View File

@@ -1764,6 +1764,16 @@ JNIEXPORT jboolean JNICALL Java_app_organicmaps_sdk_Framework_nativeHasPlacePage
return static_cast<jboolean>(frm()->HasPlacePageInfo());
}
JNIEXPORT jboolean JNICALL Java_app_organicmaps_sdk_Framework_nativeHasPanoramax(JNIEnv *, jclass)
{
return static_cast<jboolean>(g_framework->GetPlacePageInfo().HasPanoramax());
}
JNIEXPORT jstring JNICALL Java_app_organicmaps_sdk_Framework_nativeGetPanoramaxUrl(JNIEnv * env, jclass)
{
return jni::ToJavaString(env, g_framework->GetPlacePageInfo().GetPanoramaxUrl());
}
JNIEXPORT void JNICALL Java_app_organicmaps_sdk_Framework_nativeMemoryWarning(JNIEnv *, jclass)
{
return frm()->MemoryWarning();

View File

@@ -349,6 +349,8 @@ public class Framework
* @return true if c++ framework has initialized internal place page object, otherwise - false.
*/
public static native boolean nativeHasPlacePageInfo();
public static native boolean nativeHasPanoramax();
public static native String nativeGetPanoramaxUrl();
public static native void nativeMemoryWarning();
public static native void nativeSaveRoute();

View File

@@ -1758,3 +1758,4 @@ amenity|luggage_locker;1629;
building|guardhouse;[building=guardhouse],[amenity=security_booth],[amenity=checkpoint];;;;1630;
office|security;1631;
shop|lighting;1632;
panoramax|image;1633;
Can't render this file because it contains an unexpected character in line 7 and column 16.

View File

@@ -7,6 +7,7 @@
#include "generator/feature_builder.hpp"
#include "generator/final_processor_utils.hpp"
#include "generator/isolines_generator.hpp"
#include "generator/panoramax_generator.hpp"
#include "generator/mini_roundabout_transformer.hpp"
#include "generator/node_mixer.hpp"
#include "generator/osm2type.hpp"
@@ -68,6 +69,10 @@ void CountryFinalProcessor::Process()
if (!m_isolinesPath.empty())
AddIsolines();
LOG(LINFO, ("Adding panoramax..."));
if (!m_panoramaxPath.empty())
AddPanoramax();
// DropProhibitedSpeedCameras();
LOG(LINFO, ("Processing building parts..."));
ProcessBuildingParts();
@@ -293,6 +298,22 @@ void CountryFinalProcessor::AddAddresses()
LOG(LINFO, ("Total addresses:", totalStats));
}
void CountryFinalProcessor::AddPanoramax()
{
if (m_panoramaxPath.empty())
return;
PanoramaxFeaturesGenerator panoramaxGenerator(m_panoramaxPath);
ForEachMwmTmp(m_temporaryMwmPath, [&](auto const & name, auto const & path)
{
if (!IsCountry(name))
return;
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, FileWriter::Op::OP_APPEND);
panoramaxGenerator.GeneratePanoramax(name, [&](auto const & fb) { writer.Write(fb); });
}, m_threadsCount);
}
void CountryFinalProcessor::ProcessCoastline()
{
/// @todo We can remove MinSize at all.

View File

@@ -24,6 +24,7 @@ public:
void SetIsolinesDir(std::string const & dir) { m_isolinesPath = dir; }
void SetAddressesDir(std::string const & dir) { m_addressPath = dir; }
void SetPanoramaxDir(std::string const & dir) { m_panoramaxPath = dir; }
void SetCityBoundariesFiles(std::string const & collectorFile) { m_boundariesCollectorFile = collectorFile; }
@@ -39,6 +40,7 @@ private:
void AddFakeNodes();
void AddIsolines();
void AddAddresses();
void AddPanoramax();
void DropProhibitedSpeedCameras();
// void Finish();
@@ -47,7 +49,7 @@ private:
std::string m_borderPath;
std::string m_temporaryMwmPath;
std::string m_intermediateDir;
std::string m_isolinesPath, m_addressPath;
std::string m_isolinesPath, m_addressPath, m_panoramaxPath;
std::string m_boundariesCollectorFile;
std::string m_coastlineGeomFilename;
std::string m_worldCoastsFilename;

View File

@@ -39,8 +39,8 @@ struct GenerateInfo
std::string m_cacheDir;
// External folders with additional preprocessed data (isolines, addresses).
std::string m_isolinesDir, m_addressesDir;
// External folders with additional preprocessed data (isolines, addresses, panoramax).
std::string m_isolinesDir, m_addressesDir, m_panoramaxDir;
// Current generated file name if --output option is defined.
std::string m_fileName;

View File

@@ -107,6 +107,7 @@ DEFINE_string(nodes_list_path, "",
DEFINE_bool(generate_isolines_info, false, "Generate the isolines info section");
DEFINE_string(isolines_path, "", "Path to isolines directory. If set, adds isolines linear features.");
DEFINE_string(addresses_path, "", "Path to addresses directory. If set, adds addr:interpolation features.");
DEFINE_string(panoramax_path, "", "Path to panoramax directory. If set, adds panoramax imagery point features.");
// Routing.
DEFINE_bool(make_routing_index, false, "Make sections with the routing information.");
@@ -243,6 +244,7 @@ MAIN_WITH_ERROR_HANDLING([](int argc, char ** argv)
genInfo.m_complexHierarchyFilename = FLAGS_complex_hierarchy_data;
genInfo.m_isolinesDir = FLAGS_isolines_path;
genInfo.m_addressesDir = FLAGS_addresses_path;
genInfo.m_panoramaxDir = FLAGS_panoramax_path;
// Use merged style.
GetStyleReader().SetCurrentStyle(MapStyleMerged);

View File

@@ -0,0 +1,144 @@
#include "generator/panoramax_generator.hpp"
#include "indexer/classificator.hpp"
#include "indexer/feature_meta.hpp"
#include "coding/file_reader.hpp"
#include "coding/read_write_utils.hpp"
#include "geometry/mercator.hpp"
#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include <cstdint>
#include <fstream>
namespace generator
{
namespace
{
std::string_view const kPanoramax = "panoramax";
std::string_view const kImage = "image";
std::string GetPanoramaxFilePath(std::string const & countryName, std::string const & panoramaxDir)
{
return panoramaxDir + "/" + countryName + ".panoramax";
}
struct PanoramaxPoint
{
double lat;
double lon;
std::string imageId;
};
bool LoadPanoramaxPoints(std::string const & filePath, std::vector<PanoramaxPoint> & points)
{
try
{
std::ifstream file(filePath, std::ios::binary);
if (!file.is_open())
{
LOG(LWARNING, ("Can't open panoramax file", filePath));
return false;
}
// Read header
uint32_t version;
uint64_t pointCount;
file.read(reinterpret_cast<char*>(&version), sizeof(version));
file.read(reinterpret_cast<char*>(&pointCount), sizeof(pointCount));
if (version != 1)
{
LOG(LERROR, ("Unsupported panoramax file version", version));
return false;
}
points.reserve(static_cast<size_t>(pointCount));
// Read points
for (uint64_t i = 0; i < pointCount; ++i)
{
PanoramaxPoint point;
file.read(reinterpret_cast<char*>(&point.lat), sizeof(point.lat));
file.read(reinterpret_cast<char*>(&point.lon), sizeof(point.lon));
// Read image_id (length-prefixed string)
uint32_t imageIdLength;
file.read(reinterpret_cast<char*>(&imageIdLength), sizeof(imageIdLength));
if (imageIdLength > 0 && imageIdLength < 10000) // Sanity check
{
point.imageId.resize(imageIdLength);
file.read(&point.imageId[0], imageIdLength);
}
if (file.fail())
{
LOG(LERROR, ("Error reading panoramax point", i, "from", filePath));
return false;
}
points.push_back(std::move(point));
}
return true;
}
catch (std::exception const & e)
{
LOG(LERROR, ("Exception loading panoramax file", filePath, ":", e.what()));
return false;
}
}
} // namespace
PanoramaxFeaturesGenerator::PanoramaxFeaturesGenerator(std::string const & panoramaxDir)
: m_panoramaxDir(panoramaxDir)
{
Classificator const & c = classif();
m_panoramaxType = c.GetTypeByPath({kPanoramax, kImage});
}
void PanoramaxFeaturesGenerator::GeneratePanoramax(std::string const & countryName,
FeaturesCollectFn const & fn) const
{
auto const panoramaxPath = GetPanoramaxFilePath(countryName, m_panoramaxDir);
std::vector<PanoramaxPoint> points;
if (!LoadPanoramaxPoints(panoramaxPath, points))
{
LOG(LWARNING, ("Can't load panoramax points for", countryName));
return;
}
LOG(LINFO, ("Generating", points.size(), "panoramax points for", countryName));
for (auto const & point : points)
{
feature::FeatureBuilder fb;
// Set point geometry
m2::PointD const mercatorPoint = mercator::FromLatLon(point.lat, point.lon);
fb.SetCenter(mercatorPoint);
// Add classificator type
fb.AddType(m_panoramaxType);
// Add metadata with image ID
if (!point.imageId.empty())
{
fb.GetMetadata().Set(feature::Metadata::FMD_PANORAMAX, point.imageId);
}
// Panoramax points are POI features (point geometry)
fb.SetPoint();
fn(std::move(fb));
}
}
} // namespace generator

View File

@@ -0,0 +1,24 @@
#pragma once
#include "generator/feature_builder.hpp"
#include <functional>
#include <string>
namespace generator
{
// Generates Panoramax imagery point features from binary files.
// Binary files are created by the panoramax_preprocessor.py script.
class PanoramaxFeaturesGenerator
{
public:
explicit PanoramaxFeaturesGenerator(std::string const & panoramaxDir);
using FeaturesCollectFn = std::function<void(feature::FeatureBuilder && fb)>;
void GeneratePanoramax(std::string const & countryName, FeaturesCollectFn const & fn) const;
private:
std::string m_panoramaxDir;
uint32_t m_panoramaxType; // Classificator type for panoramax|image
};
} // namespace generator

View File

@@ -182,6 +182,7 @@ RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor(Affili
auto finalProcessor = std::make_shared<CountryFinalProcessor>(affiliations, m_genInfo.m_tmpDir, m_threadsCount);
finalProcessor->SetIsolinesDir(m_genInfo.m_isolinesDir);
finalProcessor->SetAddressesDir(m_genInfo.m_addressesDir);
finalProcessor->SetPanoramaxDir(m_genInfo.m_panoramaxDir);
finalProcessor->SetMiniRoundabouts(m_genInfo.GetIntermediateFileName(MINI_ROUNDABOUTS_FILENAME));
finalProcessor->SetAddrInterpolation(m_genInfo.GetIntermediateFileName(ADDR_INTERPOL_FILENAME));
if (addAds)

View File

@@ -706,6 +706,7 @@ void Framework::FillInfoFromFeatureType(FeatureType & ft, place_page::Info & inf
info.SetFromFeatureType(ft);
FillDescription(ft, info);
CheckPanoramaxImagery(info);
auto const mwmInfo = ft.GetID().m_mwmId.GetInfo();
bool const isMapVersionEditable = CanEditMapForPosition(info.GetMercator());
@@ -3263,6 +3264,43 @@ void Framework::FillDescription(FeatureType & ft, place_page::Info & info) const
}
}
void Framework::CheckPanoramaxImagery(place_page::Info & info) const
{
// Query features within 50m radius
auto constexpr radiusM = 50.0;
auto const center = info.GetMercator();
auto const rect = mercator::RectByCenterXYAndSizeInMeters(center, radiusM);
auto const panoramaxType = classif().GetTypeByPath({"panoramax", "image"});
bool hasPanoramax = false;
std::string panoramaxImageId;
std::string panoramaxUrl;
m_featuresFetcher.GetDataSource().ForEachInRect([&](FeatureType & ft)
{
if (ft.GetTypes().Has(panoramaxType))
{
auto const imageId = ft.GetMetadata(feature::Metadata::FMD_PANORAMAX);
if (!imageId.empty())
{
hasPanoramax = true;
panoramaxImageId = std::string(imageId);
panoramaxUrl = "https://panoramax.openstreetmap.fr/#focus=pic:" + panoramaxImageId;
return base::ControlFlow::Break; // Found one, stop searching
}
}
return base::ControlFlow::Continue;
}, rect, df::GetDrawTileScale(rect));
if (hasPanoramax)
{
info.m_hasPanoramax = true;
info.m_panoramaxImageId = std::move(panoramaxImageId);
info.m_panoramaxUrl = std::move(panoramaxUrl);
}
}
void Framework::OnPowerFacilityChanged(power_management::Facility const facility, bool enabled)
{
if (facility == power_management::Facility::PerspectiveView || facility == power_management::Facility::Buildings3d)

View File

@@ -640,6 +640,7 @@ private:
void FillTrackInfo(Track const & track, m2::PointD const & trackPoint, place_page::Info & info) const;
void SetPlacePageLocation(place_page::Info & info);
void FillDescription(FeatureType & ft, place_page::Info & info) const;
void CheckPanoramaxImagery(place_page::Info & info) const;
public:
search::ReverseGeocoder::Address GetAddressAtPoint(m2::PointD const & pt) const;

View File

@@ -114,6 +114,9 @@ public:
bool HasApiUrl() const { return !m_apiUrl.empty(); }
/// TODO: Support all possible Internet types in UI. @See MapObject::GetInternet().
bool HasWifi() const { return GetInternet() == feature::Internet::Wlan; }
/// @returns true if Panoramax imagery is available within 50m.
bool HasPanoramax() const { return m_hasPanoramax; }
std::string const & GetPanoramaxUrl() const { return m_panoramaxUrl; }
/// Should be used by UI code to generate cool name for new bookmarks.
// TODO: Tune new bookmark name. May be add address or some other data.
kml::LocalizableString FormatNewBookmarkName() const;
@@ -258,6 +261,11 @@ private:
/// Formatted feature address for inner using.
std::string m_address;
/// Panoramax
bool m_hasPanoramax = false;
std::string m_panoramaxImageId;
std::string m_panoramaxUrl;
/// Routing
RouteMarkType m_routeMarkType;
size_t m_intermediateIndex = 0;

View File

@@ -351,6 +351,10 @@ class PathProvider:
def addresses_path() -> AnyStr:
return settings.ADDRESSES_PATH
@staticmethod
def panoramax_path() -> AnyStr:
return settings.PANORAMAX_PATH
@staticmethod
def borders_path() -> AnyStr:
return os.path.join(settings.USER_RESOURCE_PATH, "borders")

View File

@@ -121,6 +121,7 @@ US_POSTCODES_URL = ""
SRTM_PATH = ""
ISOLINES_PATH = ""
ADDRESSES_PATH = ""
PANORAMAX_PATH = ""
# Stats section:
STATS_TYPES_CONFIG = os.path.join(ETC_DIR, "stats_types_config.txt")
@@ -278,6 +279,7 @@ def init(default_settings_path: AnyStr):
global SRTM_PATH
global ISOLINES_PATH
global ADDRESSES_PATH
global PANORAMAX_PATH
PLANET_URL = cfg.get_opt_path("External", "PLANET_URL", PLANET_URL)
PLANET_MD5_URL = cfg.get_opt_path("External", "PLANET_MD5_URL", md5_ext(PLANET_URL))
@@ -306,6 +308,7 @@ def init(default_settings_path: AnyStr):
SRTM_PATH = cfg.get_opt_path("External", "SRTM_PATH", SRTM_PATH)
ISOLINES_PATH = cfg.get_opt_path("External", "ISOLINES_PATH", ISOLINES_PATH)
ADDRESSES_PATH = cfg.get_opt_path("External", "ADDRESSES_PATH", ADDRESSES_PATH)
PANORAMAX_PATH = cfg.get_opt_path("External", "PANORAMAX_PATH", PANORAMAX_PATH)
# Stats section:
global STATS_TYPES_CONFIG

View File

@@ -134,6 +134,8 @@ class StageFeatures(Stage):
if is_accepted(env, StageIsolinesInfo):
extra.update({"isolines_path": PathProvider.isolines_path()})
extra.update({"addresses_path": PathProvider.addresses_path()})
if PathProvider.panoramax_path():
extra.update({"panoramax_path": PathProvider.panoramax_path()})
steps.step_features(env, **extra)
if os.path.exists(env.paths.packed_polygons_path):

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env python3
"""
Panoramax Preprocessor
Converts the global Panoramax geoparquet file into per-country binary files
for use in the map generator.
The script streams the large geoparquet file (20GB+) using DuckDB to avoid
loading everything into memory, performs a spatial join with country polygons,
and writes compact binary files for each country.
Binary Format:
Header:
uint32 version (=1)
uint64 point_count
Data (repeated point_count times):
double lat (8 bytes)
double lon (8 bytes)
string image_id (length-prefixed: uint32 length + bytes)
"""
import argparse
import logging
import struct
import sys
from pathlib import Path
from typing import Dict, List, Tuple
from collections import defaultdict
try:
import duckdb
except ImportError:
print("Error: duckdb is required. Install with: pip install duckdb", file=sys.stderr)
sys.exit(1)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def load_country_polygons(polygons_file: Path) -> Dict[str, any]:
"""
Load country polygons from packed_polygons.bin file.
This is a placeholder - actual implementation would need to parse the binary format.
For now, we'll use a simpler approach with DuckDB spatial functions.
"""
# TODO: Implement actual polygon loading from packed_polygons.bin
# For MVP, we can use a simplified approach or require pre-processed country boundaries
logger.warning("Country polygon loading not yet implemented - using fallback method")
return {}
def determine_country_from_coords(lat: float, lon: float, conn: duckdb.DuckDBPyConnection) -> str:
"""
Determine which country a coordinate belongs to.
This uses a simple approach for MVP - can be enhanced later.
Returns country name or "Unknown" if not found.
"""
# Simplified country detection for MVP
# TODO: Use actual country polygons for accurate spatial join
# For now, return a simplified country code based on rough lat/lon bounds
# This is just for initial testing - real implementation needs proper spatial join
if 40 < lat < 52 and -5 < lon < 10:
return "France"
elif 45 < lat < 48 and 5 < lon < 11:
return "Switzerland"
elif 43 < lat < 44 and 7 < lon < 8:
return "Monaco"
else:
return "Unknown"
def write_binary_file(output_path: Path, points: List[Tuple[float, float, str]]):
"""
Write panoramax points to binary file.
Format:
Header:
uint32 version = 1
uint64 point_count
Data:
For each point:
double lat
double lon
uint32 image_id_length
bytes image_id
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'wb') as f:
# Write header
version = 1
point_count = len(points)
f.write(struct.pack('<I', version)) # uint32 version
f.write(struct.pack('<Q', point_count)) # uint64 point_count
# Write points
for lat, lon, image_id in points:
f.write(struct.pack('<d', lat)) # double lat
f.write(struct.pack('<d', lon)) # double lon
# Write image_id as length-prefixed string
image_id_bytes = image_id.encode('utf-8')
f.write(struct.pack('<I', len(image_id_bytes))) # uint32 length
f.write(image_id_bytes) # bytes
logger.info(f"Wrote {point_count} points to {output_path}")
def process_parquet_streaming(parquet_url: str, output_dir: Path, batch_size: int = 100000):
"""
Stream the Panoramax parquet file and write per-country binary files.
Uses DuckDB to stream the large parquet file without loading it entirely into memory.
"""
conn = duckdb.connect(database=':memory:')
# Enable httpfs extension for remote file access
try:
conn.execute("INSTALL httpfs;")
conn.execute("LOAD httpfs;")
except Exception as e:
logger.warning(f"Could not load httpfs extension: {e}")
# Install spatial extension for future country boundary support
try:
conn.execute("INSTALL spatial;")
conn.execute("LOAD spatial;")
except Exception as e:
logger.warning(f"Could not load spatial extension: {e}")
logger.info(f"Reading parquet file: {parquet_url}")
# Dictionary to accumulate points per country
country_points: Dict[str, List[Tuple[float, float, str]]] = defaultdict(list)
# Stream the parquet file in batches
# Assuming parquet has columns: latitude, longitude, id (or similar)
# Adjust column names based on actual Panoramax parquet schema
query = f"""
SELECT
latitude as lat,
longitude as lon,
id as image_id
FROM read_parquet('{parquet_url}')
WHERE latitude IS NOT NULL AND longitude IS NOT NULL
"""
try:
result = conn.execute(query)
batch_count = 0
total_points = 0
while True:
batch = result.fetchmany(batch_size)
if not batch:
break
batch_count += 1
batch_size_actual = len(batch)
total_points += batch_size_actual
logger.info(f"Processing batch {batch_count}: {batch_size_actual} points (total: {total_points})")
for row in batch:
lat, lon, image_id = row
# Determine country
country = determine_country_from_coords(lat, lon, conn)
# Skip unknown countries for now (or save to separate file)
if country != "Unknown":
country_points[country].append((lat, lon, str(image_id)))
# Periodically write to disk to avoid memory issues
if batch_count % 10 == 0:
for country, points in country_points.items():
if len(points) > 100000: # Write if accumulated > 100k points
output_file = output_dir / f"{country}.panoramax"
# Append mode for incremental writing
# TODO: Implement append mode or accumulate all then write once
logger.info(f"Country {country} has {len(points)} points accumulated")
logger.info(f"Finished processing {total_points} total points")
logger.info(f"Countries found: {list(country_points.keys())}")
# Write final output files
for country, points in country_points.items():
if points:
output_file = output_dir / f"{country}.panoramax"
write_binary_file(output_file, points)
except Exception as e:
logger.error(f"Error processing parquet: {e}")
raise
finally:
conn.close()
def main():
parser = argparse.ArgumentParser(
description="Convert Panoramax geoparquet to per-country binary files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
'--input',
default='https://api.panoramax.xyz/data/geoparquet/panoramax.parquet',
help='Path or URL to Panoramax geoparquet file (default: official Panoramax URL)'
)
parser.add_argument(
'--output',
type=Path,
required=True,
help='Output directory for per-country .panoramax files'
)
parser.add_argument(
'--polygons',
type=Path,
help='Path to packed_polygons.bin file (optional, for accurate country detection)'
)
parser.add_argument(
'--batch-size',
type=int,
default=100000,
help='Number of rows to process per batch (default: 100000)'
)
args = parser.parse_args()
logger.info("Panoramax Preprocessor starting")
logger.info(f"Input: {args.input}")
logger.info(f"Output directory: {args.output}")
logger.info(f"Batch size: {args.batch_size}")
if args.polygons:
logger.info(f"Country polygons: {args.polygons}")
# TODO: Load and use country polygons for accurate spatial join
else:
logger.warning("No country polygons provided - using simplified country detection")
# Create output directory
args.output.mkdir(parents=True, exist_ok=True)
# Process the parquet file
process_parquet_streaming(args.input, args.output, args.batch_size)
logger.info("Panoramax preprocessing complete!")
if __name__ == '__main__':
main()

View File

@@ -80,6 +80,7 @@ SUBWAY_URL: file:///home/planet/subway/subways.transit.json
SRTM_PATH: /home/planet/SRTM-patched-europe/
ISOLINES_PATH: /home/planet/isolines/
ADDRESSES_PATH: /home/planet/tiger/
PANORAMAX_PATH: /home/planet/panoramax/countries/
# Local path (not url!) to .csv files.
UK_POSTCODES_URL: /home/planet/postcodes/gb-postcode-data/gb_postcodes.csv

View File

@@ -13,6 +13,7 @@ mkdir -p /home/planet/postcodes/gb-postcode-data/
mkdir -p /home/planet/postcodes/us-postcodes/
mkdir -p /home/planet/SRTM-patched-europe/
mkdir -p /home/planet/subway
mkdir -p /home/planet/panoramax/countries/
echo "<$(date +%T)> Running ./configure.sh ..."
cd ~/comaps