From 66a71869d2a20409be5a7c99675f3296b7ee258b Mon Sep 17 00:00:00 2001 From: Yannik Bloscheck Date: Wed, 21 Jan 2026 22:17:50 +0100 Subject: [PATCH] [generator] Fixed ordering for many subtypes Signed-off-by: Yannik Bloscheck --- generator/osm2type.cpp | 32 ++++++++++++++------- libs/indexer/feature_data.cpp | 34 +++++++++++++--------- libs/indexer/ftypes_subtypes.cpp | 1 - libs/indexer/ftypes_subtypes.hpp | 49 ++++++++++++++++++++++++++++---- 4 files changed, 86 insertions(+), 30 deletions(-) diff --git a/generator/osm2type.cpp b/generator/osm2type.cpp index a5008505f..f97f2ad29 100644 --- a/generator/osm2type.cpp +++ b/generator/osm2type.cpp @@ -362,9 +362,20 @@ void LeaveLongestTypes(std::vector & matchedTypes) { // Prevents types, that either have subtypes or are subtypes, from being removed auto subtypes = ftypes::Subtypes::Instance(); - auto const hasSubtypeRelatedTypes = [subtypes](auto const & lhs, auto const & rhs) + auto const areSubtypeRelatedTypes = [subtypes](auto const & lhs, auto const & rhs) { - return subtypes.IsPathOfTypeWithSubtypesOrSubtype(lhs) || subtypes.IsPathOfTypeWithSubtypesOrSubtype(rhs); + return subtypes.IsTypeWithSubtypesOrSubtype(lhs) && subtypes.IsTypeWithSubtypesOrSubtype(rhs); + }; + auto const isBetterBecauseOfSubtypeRelation = [subtypes](auto const & lhs, auto const & rhs) -> std::optional + { + bool const lhsIsTypeWithSubtypesOrSubtype = subtypes.IsTypeWithSubtypesOrSubtype(lhs); + bool const rhsIsTypeWithSubtypesOrSubtype = subtypes.IsTypeWithSubtypesOrSubtype(rhs); + if (lhsIsTypeWithSubtypesOrSubtype && !rhsIsTypeWithSubtypesOrSubtype) + return true; + else if (!lhsIsTypeWithSubtypesOrSubtype && rhsIsTypeWithSubtypesOrSubtype) + return false; + + return subtypes.ComparisonResultBasedOnTypeRelation(lhs, rhs); }; auto const equalPrefix = [](auto const & lhs, auto const & rhs) @@ -373,8 +384,12 @@ void LeaveLongestTypes(std::vector & matchedTypes) return equal(lhs.begin(), lhs.begin() + std::min(size_t(2), prefixSz), rhs.begin()); }; - auto const isBetter = [&equalPrefix](auto const & lhs, auto const & rhs) + auto const isBetter = [&equalPrefix, &isBetterBecauseOfSubtypeRelation](auto const & lhs, auto const & rhs) { + std::optional const isBetterBecauseOfSubtypeRelationResult = isBetterBecauseOfSubtypeRelation(lhs, rhs); + if (isBetterBecauseOfSubtypeRelationResult.has_value()) + return isBetterBecauseOfSubtypeRelationResult.value(); + if (equalPrefix(lhs, rhs)) { // Longest type is better. @@ -385,17 +400,14 @@ void LeaveLongestTypes(std::vector & matchedTypes) return lhs < rhs; }; - // `true` means it will be deleted, because being equal means it isn't unique - auto const isEqual = [&equalPrefix, &hasSubtypeRelatedTypes](auto const & lhs, auto const & rhs) + // `true` means the second one will be removed, because being equal means it isn't unique and the first one is more important + auto const isEqual = [&equalPrefix, &areSubtypeRelatedTypes](auto const & lhs, auto const & rhs) { - if (hasSubtypeRelatedTypes(lhs, rhs)) - return false; - if (equalPrefix(lhs, rhs)) { - // Keep longest type only, so return equal is true. + // Keep longest type only if (lhs.size() != rhs.size()) - return true; + return !areSubtypeRelatedTypes(lhs, rhs); return lhs == rhs; } diff --git a/libs/indexer/feature_data.cpp b/libs/indexer/feature_data.cpp index d58aedacd..09557561a 100644 --- a/libs/indexer/feature_data.cpp +++ b/libs/indexer/feature_data.cpp @@ -92,17 +92,20 @@ public: /// @return Type score, less is better. uint8_t Score(uint32_t t) const { + if (IsIn(0, t)) + return 1; + ftype::TruncValue(t, 2); + if (IsIn(3, t)) + return 4; + + ftype::TruncValue(t, 1); if (IsIn(2, t)) return 3; - ftype::TruncValue(t, 1); if (IsIn(1, t)) return 2; - if (IsIn(0, t)) - return 1; - return 0; } @@ -118,7 +121,7 @@ private: { // Fill types that will be taken into account last, // when we have many types for POI. - base::StringIL const types1[] = { + base::StringIL const types2[] = { // 1-arity {"building:part"}, {"hwtag"}, {"psurface"}, {"internet_access"}, {"organic"}, {"wheelchair"}, {"cuisine"}, {"area:highway"}, {"fee"}, @@ -126,15 +129,18 @@ private: Classificator const & c = classif(); - m_types[0].push_back(c.GetTypeByPath({"building"})); + for (auto const subtype : ftypes::Subtypes::Instance().AllSubtypes()) + m_types[0].push_back(subtype); - m_types[1].reserve(std::size(types1)); - for (auto const & type : types1) - m_types[1].push_back(c.GetTypeByPath(type)); + m_types[1].push_back(c.GetTypeByPath({"building"})); + + m_types[2].reserve(std::size(types2)); + for (auto const & type : types2) + m_types[2].push_back(c.GetTypeByPath(type)); // Put _most_ useless types here, that are not fit in the arity logic above. // This change is for generator, to eliminate "lit" type first when max types count exceeded. - m_types[2].push_back(c.GetTypeByPath({"hwtag", "lit"})); + m_types[3].push_back(c.GetTypeByPath({"hwtag", "lit"})); for (auto & v : m_types) std::sort(v.begin(), v.end()); @@ -142,7 +148,7 @@ private: bool IsIn(uint8_t idx, uint32_t t) const { return std::binary_search(m_types[idx].begin(), m_types[idx].end(), t); } - vector m_types[3]; + vector m_types[4]; }; } // namespace @@ -196,9 +202,9 @@ void TypesHolder::SortBySpec() std::stable_sort(begin(), end(), [&checker, &getPriority, &subtypes](uint32_t t1, uint32_t t2) { - std::optional const comaprisonResultBasedOnTypeRelation = subtypes.ComaprisonResultBasedOnTypeRelation(t1, t2); - if (comaprisonResultBasedOnTypeRelation.has_value()) - return comaprisonResultBasedOnTypeRelation.value(); + std::optional const comparisonResultBasedOnTypeRelation = subtypes.ComparisonResultBasedOnTypeRelation(t1, t2); + if (comparisonResultBasedOnTypeRelation.has_value()) + return comparisonResultBasedOnTypeRelation.value(); int const p1 = getPriority(t1); int const p2 = getPriority(t2); diff --git a/libs/indexer/ftypes_subtypes.cpp b/libs/indexer/ftypes_subtypes.cpp index d60677757..002f9f6c5 100644 --- a/libs/indexer/ftypes_subtypes.cpp +++ b/libs/indexer/ftypes_subtypes.cpp @@ -3,7 +3,6 @@ #include "base/assert.hpp" #include "coding/csv_reader.hpp" #include "coding/reader_streambuf.hpp" -#include "indexer/classificator.hpp" #include "platform/platform.hpp" #include "defines.hpp" diff --git a/libs/indexer/ftypes_subtypes.hpp b/libs/indexer/ftypes_subtypes.hpp index 57a54071b..c488a358e 100644 --- a/libs/indexer/ftypes_subtypes.hpp +++ b/libs/indexer/ftypes_subtypes.hpp @@ -1,5 +1,7 @@ #pragma once +#include "indexer/classificator.hpp" + #include #include #include @@ -18,6 +20,24 @@ public: /// Static instance static Subtypes const & Instance(); + /** + * Lists all types with subtypes + * @return All types with subtypes + */ + unordered_set AllTypesWithSubtypes() const + { + return m_types; + } + + /** + * Lists all subtypes + * @return All subtypes + */ + unordered_set AllSubtypes() const + { + return m_subtypes; + } + /** * Checks if the given type is a type with subtypes or a subtype * @param type The type to check @@ -69,15 +89,17 @@ public: * @param secondType The type to compare * @return `true` if the first type is a subtype but the second one isn't, `false` if it is the other way around */ - optional ComaprisonResultBasedOnTypeRelation(uint32_t const firstType, uint32_t const secondType) const + optional ComparisonResultBasedOnTypeRelation(uint32_t const firstType, uint32_t const secondType) const { + bool const firstTypeIsTypeWithSubtypes = IsTypeWithSubtypes(firstType); bool const firstTypeIsSubtype = IsSubtype(firstType); + bool const secondTypeIsTypeWithSubtypes = IsTypeWithSubtypes(secondType); bool const secondTypeIsSubtype = IsSubtype(secondType); - if (!firstTypeIsSubtype && !secondTypeIsSubtype) + if ((!firstTypeIsTypeWithSubtypes && !firstTypeIsSubtype) || (!secondTypeIsTypeWithSubtypes && !secondTypeIsSubtype) || (firstTypeIsTypeWithSubtypes && secondTypeIsTypeWithSubtypes)) return {}; - else if (firstTypeIsSubtype && !secondTypeIsSubtype) + else if (firstTypeIsSubtype && secondTypeIsTypeWithSubtypes) return false; - else if (!firstTypeIsSubtype && secondTypeIsSubtype) + else if (firstTypeIsTypeWithSubtypes && secondTypeIsSubtype) return true; // If they got to here, both are subtypes. So use the order of the subtypes for the comparison. @@ -98,12 +120,29 @@ public: * @param typePath The type path to check * @return `true` if it is a type with subtypes or a subtype, otherwise `false` */ - bool IsPathOfTypeWithSubtypesOrSubtype(vector const typePath) const + bool IsTypeWithSubtypesOrSubtype(vector const typePath) const { return ranges::find(m_typesAndSubtypesPaths.begin(), m_typesAndSubtypesPaths.end(), typePath) != m_typesAndSubtypesPaths.end(); } + /** + * Compares to given types based on their type relation + * @param firstTypePath The first type to compare + * @param secondTypePath The type to compare + * @return `true` if the first type is a subtype but the second one isn't, `false` if it is the other way around + */ + optional ComparisonResultBasedOnTypeRelation(vector const firstTypePath, vector const secondTypePath) const + { + auto const & classificator = classif(); + uint32_t const firstType = classificator.GetTypeByPathSafe(vector(firstTypePath.begin(), firstTypePath.end())); + uint32_t const secondType = classificator.GetTypeByPathSafe(vector(secondTypePath.begin(), secondTypePath.end())); + if (firstType != IndexAndTypeMapping::INVALID_TYPE && secondType != IndexAndTypeMapping::INVALID_TYPE) + return ComparisonResultBasedOnTypeRelation(firstType, secondType); + + return {}; + } + private: /// Constructor Subtypes();