Compare commits

..

1 Commits

Author SHA1 Message Date
Yannik Bloscheck
66a71869d2 [generator] Fixed ordering for many subtypes
Signed-off-by: Yannik Bloscheck <git@yannikbloscheck.com>
2026-01-22 00:15:05 +01:00
4 changed files with 86 additions and 30 deletions

View File

@@ -362,9 +362,20 @@ void LeaveLongestTypes(std::vector<generator::TypeStrings> & matchedTypes)
{
// Prevents types, that either have subtypes or are subtypes, from being removed
auto subtypes = ftypes::Subtypes::Instance();
auto const hasSubtypeRelatedTypes = [subtypes](auto const & lhs, auto const & rhs)
auto const areSubtypeRelatedTypes = [subtypes](auto const & lhs, auto const & rhs)
{
return subtypes.IsPathOfTypeWithSubtypesOrSubtype(lhs) || subtypes.IsPathOfTypeWithSubtypesOrSubtype(rhs);
return subtypes.IsTypeWithSubtypesOrSubtype(lhs) && subtypes.IsTypeWithSubtypesOrSubtype(rhs);
};
auto const isBetterBecauseOfSubtypeRelation = [subtypes](auto const & lhs, auto const & rhs) -> std::optional<bool>
{
bool const lhsIsTypeWithSubtypesOrSubtype = subtypes.IsTypeWithSubtypesOrSubtype(lhs);
bool const rhsIsTypeWithSubtypesOrSubtype = subtypes.IsTypeWithSubtypesOrSubtype(rhs);
if (lhsIsTypeWithSubtypesOrSubtype && !rhsIsTypeWithSubtypesOrSubtype)
return true;
else if (!lhsIsTypeWithSubtypesOrSubtype && rhsIsTypeWithSubtypesOrSubtype)
return false;
return subtypes.ComparisonResultBasedOnTypeRelation(lhs, rhs);
};
auto const equalPrefix = [](auto const & lhs, auto const & rhs)
@@ -373,8 +384,12 @@ void LeaveLongestTypes(std::vector<generator::TypeStrings> & matchedTypes)
return equal(lhs.begin(), lhs.begin() + std::min(size_t(2), prefixSz), rhs.begin());
};
auto const isBetter = [&equalPrefix](auto const & lhs, auto const & rhs)
auto const isBetter = [&equalPrefix, &isBetterBecauseOfSubtypeRelation](auto const & lhs, auto const & rhs)
{
std::optional<bool> const isBetterBecauseOfSubtypeRelationResult = isBetterBecauseOfSubtypeRelation(lhs, rhs);
if (isBetterBecauseOfSubtypeRelationResult.has_value())
return isBetterBecauseOfSubtypeRelationResult.value();
if (equalPrefix(lhs, rhs))
{
// Longest type is better.
@@ -385,17 +400,14 @@ void LeaveLongestTypes(std::vector<generator::TypeStrings> & matchedTypes)
return lhs < rhs;
};
// `true` means it will be deleted, because being equal means it isn't unique
auto const isEqual = [&equalPrefix, &hasSubtypeRelatedTypes](auto const & lhs, auto const & rhs)
// `true` means the second one will be removed, because being equal means it isn't unique and the first one is more important
auto const isEqual = [&equalPrefix, &areSubtypeRelatedTypes](auto const & lhs, auto const & rhs)
{
if (hasSubtypeRelatedTypes(lhs, rhs))
return false;
if (equalPrefix(lhs, rhs))
{
// Keep longest type only, so return equal is true.
// Keep longest type only
if (lhs.size() != rhs.size())
return true;
return !areSubtypeRelatedTypes(lhs, rhs);
return lhs == rhs;
}

View File

@@ -92,17 +92,20 @@ public:
/// @return Type score, less is better.
uint8_t Score(uint32_t t) const
{
if (IsIn(0, t))
return 1;
ftype::TruncValue(t, 2);
if (IsIn(3, t))
return 4;
ftype::TruncValue(t, 1);
if (IsIn(2, t))
return 3;
ftype::TruncValue(t, 1);
if (IsIn(1, t))
return 2;
if (IsIn(0, t))
return 1;
return 0;
}
@@ -118,7 +121,7 @@ private:
{
// Fill types that will be taken into account last,
// when we have many types for POI.
base::StringIL const types1[] = {
base::StringIL const types2[] = {
// 1-arity
{"building:part"}, {"hwtag"}, {"psurface"}, {"internet_access"}, {"organic"},
{"wheelchair"}, {"cuisine"}, {"area:highway"}, {"fee"},
@@ -126,15 +129,18 @@ private:
Classificator const & c = classif();
m_types[0].push_back(c.GetTypeByPath({"building"}));
for (auto const subtype : ftypes::Subtypes::Instance().AllSubtypes())
m_types[0].push_back(subtype);
m_types[1].reserve(std::size(types1));
for (auto const & type : types1)
m_types[1].push_back(c.GetTypeByPath(type));
m_types[1].push_back(c.GetTypeByPath({"building"}));
m_types[2].reserve(std::size(types2));
for (auto const & type : types2)
m_types[2].push_back(c.GetTypeByPath(type));
// Put _most_ useless types here, that are not fit in the arity logic above.
// This change is for generator, to eliminate "lit" type first when max types count exceeded.
m_types[2].push_back(c.GetTypeByPath({"hwtag", "lit"}));
m_types[3].push_back(c.GetTypeByPath({"hwtag", "lit"}));
for (auto & v : m_types)
std::sort(v.begin(), v.end());
@@ -142,7 +148,7 @@ private:
bool IsIn(uint8_t idx, uint32_t t) const { return std::binary_search(m_types[idx].begin(), m_types[idx].end(), t); }
vector<uint32_t> m_types[3];
vector<uint32_t> m_types[4];
};
} // namespace
@@ -196,9 +202,9 @@ void TypesHolder::SortBySpec()
std::stable_sort(begin(), end(), [&checker, &getPriority, &subtypes](uint32_t t1, uint32_t t2)
{
std::optional<bool> const comaprisonResultBasedOnTypeRelation = subtypes.ComaprisonResultBasedOnTypeRelation(t1, t2);
if (comaprisonResultBasedOnTypeRelation.has_value())
return comaprisonResultBasedOnTypeRelation.value();
std::optional<bool> const comparisonResultBasedOnTypeRelation = subtypes.ComparisonResultBasedOnTypeRelation(t1, t2);
if (comparisonResultBasedOnTypeRelation.has_value())
return comparisonResultBasedOnTypeRelation.value();
int const p1 = getPriority(t1);
int const p2 = getPriority(t2);

View File

@@ -3,7 +3,6 @@
#include "base/assert.hpp"
#include "coding/csv_reader.hpp"
#include "coding/reader_streambuf.hpp"
#include "indexer/classificator.hpp"
#include "platform/platform.hpp"
#include "defines.hpp"

View File

@@ -1,5 +1,7 @@
#pragma once
#include "indexer/classificator.hpp"
#include <cstdint>
#include <map>
#include <optional>
@@ -18,6 +20,24 @@ public:
/// Static instance
static Subtypes const & Instance();
/**
* Lists all types with subtypes
* @return All types with subtypes
*/
unordered_set<uint32_t> AllTypesWithSubtypes() const
{
return m_types;
}
/**
* Lists all subtypes
* @return All subtypes
*/
unordered_set<uint32_t> AllSubtypes() const
{
return m_subtypes;
}
/**
* Checks if the given type is a type with subtypes or a subtype
* @param type The type to check
@@ -69,15 +89,17 @@ public:
* @param secondType The type to compare
* @return `true` if the first type is a subtype but the second one isn't, `false` if it is the other way around
*/
optional<bool> ComaprisonResultBasedOnTypeRelation(uint32_t const firstType, uint32_t const secondType) const
optional<bool> ComparisonResultBasedOnTypeRelation(uint32_t const firstType, uint32_t const secondType) const
{
bool const firstTypeIsTypeWithSubtypes = IsTypeWithSubtypes(firstType);
bool const firstTypeIsSubtype = IsSubtype(firstType);
bool const secondTypeIsTypeWithSubtypes = IsTypeWithSubtypes(secondType);
bool const secondTypeIsSubtype = IsSubtype(secondType);
if (!firstTypeIsSubtype && !secondTypeIsSubtype)
if ((!firstTypeIsTypeWithSubtypes && !firstTypeIsSubtype) || (!secondTypeIsTypeWithSubtypes && !secondTypeIsSubtype) || (firstTypeIsTypeWithSubtypes && secondTypeIsTypeWithSubtypes))
return {};
else if (firstTypeIsSubtype && !secondTypeIsSubtype)
else if (firstTypeIsSubtype && secondTypeIsTypeWithSubtypes)
return false;
else if (!firstTypeIsSubtype && secondTypeIsSubtype)
else if (firstTypeIsTypeWithSubtypes && secondTypeIsSubtype)
return true;
// If they got to here, both are subtypes. So use the order of the subtypes for the comparison.
@@ -98,12 +120,29 @@ public:
* @param typePath The type path to check
* @return `true` if it is a type with subtypes or a subtype, otherwise `false`
*/
bool IsPathOfTypeWithSubtypesOrSubtype(vector<string> const typePath) const
bool IsTypeWithSubtypesOrSubtype(vector<string> const typePath) const
{
return ranges::find(m_typesAndSubtypesPaths.begin(), m_typesAndSubtypesPaths.end(), typePath) !=
m_typesAndSubtypesPaths.end();
}
/**
* Compares to given types based on their type relation
* @param firstTypePath The first type to compare
* @param secondTypePath The type to compare
* @return `true` if the first type is a subtype but the second one isn't, `false` if it is the other way around
*/
optional<bool> ComparisonResultBasedOnTypeRelation(vector<string> const firstTypePath, vector<string> const secondTypePath) const
{
auto const & classificator = classif();
uint32_t const firstType = classificator.GetTypeByPathSafe(vector<string_view>(firstTypePath.begin(), firstTypePath.end()));
uint32_t const secondType = classificator.GetTypeByPathSafe(vector<string_view>(secondTypePath.begin(), secondTypePath.end()));
if (firstType != IndexAndTypeMapping::INVALID_TYPE && secondType != IndexAndTypeMapping::INVALID_TYPE)
return ComparisonResultBasedOnTypeRelation(firstType, secondType);
return {};
}
private:
/// Constructor
Subtypes();