mirror of
https://codeberg.org/comaps/comaps
synced 2025-12-23 14:43:43 +00:00
committed by
Konstantin Pastbin
parent
556a474fda
commit
e7c04c5459
@@ -9,8 +9,6 @@ set(SRC
|
|||||||
affiliation.hpp
|
affiliation.hpp
|
||||||
altitude_generator.cpp
|
altitude_generator.cpp
|
||||||
altitude_generator.hpp
|
altitude_generator.hpp
|
||||||
# Should precede booking_dataset.cpp because of Unity build + template instantiation order.
|
|
||||||
booking_scoring.cpp
|
|
||||||
booking_dataset.cpp
|
booking_dataset.cpp
|
||||||
booking_dataset.hpp
|
booking_dataset.hpp
|
||||||
borders.cpp
|
borders.cpp
|
||||||
@@ -126,6 +124,8 @@ set(SRC
|
|||||||
isolines_generator.hpp
|
isolines_generator.hpp
|
||||||
isolines_section_builder.cpp
|
isolines_section_builder.cpp
|
||||||
isolines_section_builder.hpp
|
isolines_section_builder.hpp
|
||||||
|
kayak_dataset.cpp
|
||||||
|
kayak_dataset.hpp
|
||||||
maxspeeds_builder.cpp
|
maxspeeds_builder.cpp
|
||||||
maxspeeds_builder.hpp
|
maxspeeds_builder.hpp
|
||||||
maxspeeds_collector.cpp
|
maxspeeds_collector.cpp
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
#include "generator/booking_dataset.hpp"
|
#include "generator/booking_dataset.hpp"
|
||||||
#include "generator/feature_builder.hpp"
|
#include "generator/feature_builder.hpp"
|
||||||
#include "generator/sponsored_scoring.hpp"
|
|
||||||
|
|
||||||
#include "indexer/classificator.hpp"
|
#include "indexer/classificator.hpp"
|
||||||
#include "indexer/ftypes_matcher.hpp"
|
#include "indexer/ftypes_matcher.hpp"
|
||||||
@@ -18,37 +17,42 @@ namespace generator
|
|||||||
using namespace feature;
|
using namespace feature;
|
||||||
|
|
||||||
// BookingHotel ------------------------------------------------------------------------------------
|
// BookingHotel ------------------------------------------------------------------------------------
|
||||||
BookingHotel::BookingHotel(std::string const & src)
|
BookingHotel::BookingHotel(std::string src)
|
||||||
{
|
{
|
||||||
|
/// @todo For fast parsing we can preprocess src (quotes) and return string_view's.
|
||||||
std::vector<std::string> rec;
|
std::vector<std::string> rec;
|
||||||
strings::ParseCSVRow(src, '\t', rec);
|
strings::ParseCSVRow(src, '\t', rec);
|
||||||
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing hotels.tsv line:",
|
|
||||||
boost::replace_all_copy(src, "\t", "\\t")));
|
|
||||||
|
|
||||||
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get()), ());
|
CHECK_EQUAL(rec.size(), Fields::Counter,
|
||||||
// TODO(mgsergio): Use ms::LatLon.
|
("Error parsing hotels entry:", boost::replace_all_copy(src, "\t", "\\t")));
|
||||||
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.m_lat), ());
|
|
||||||
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.m_lon), ());
|
|
||||||
|
|
||||||
m_name = rec[FieldIndex(Fields::Name)];
|
// Assign id in the end in case of possible errors.
|
||||||
m_address = rec[FieldIndex(Fields::Address)];
|
uint32_t id;
|
||||||
|
CLOG(LDEBUG, strings::to_uint(rec[Fields::Id], id), ());
|
||||||
|
CLOG(LDEBUG, strings::to_double(rec[Fields::Latitude], m_latLon.m_lat), ());
|
||||||
|
CLOG(LDEBUG, strings::to_double(rec[Fields::Longitude], m_latLon.m_lon), ());
|
||||||
|
|
||||||
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Stars)], m_stars), ());
|
m_name = rec[Fields::Name];
|
||||||
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::PriceCategory)], m_priceCategory), ());
|
m_address = rec[Fields::Address];
|
||||||
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::RatingBooking)], m_ratingBooking), ());
|
|
||||||
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::RatingUsers)], m_ratingUser), ());
|
|
||||||
|
|
||||||
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
|
CLOG(LDEBUG, strings::to_uint(rec[Fields::Stars], m_stars), ());
|
||||||
|
CLOG(LDEBUG, strings::to_uint(rec[Fields::PriceCategory], m_priceCategory), ());
|
||||||
|
CLOG(LDEBUG, strings::to_double(rec[Fields::RatingBooking], m_ratingBooking), ());
|
||||||
|
CLOG(LDEBUG, strings::to_double(rec[Fields::RatingUsers], m_ratingUser), ());
|
||||||
|
|
||||||
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Type)], m_type), ());
|
m_descUrl = rec[Fields::DescUrl];
|
||||||
|
|
||||||
m_translations = rec[FieldIndex(Fields::Translations)];
|
CLOG(LDEBUG, strings::to_uint(rec[Fields::Type], m_type), ());
|
||||||
|
|
||||||
|
m_translations = rec[Fields::Translations];
|
||||||
|
|
||||||
|
m_id.Set(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// BookingDataset ----------------------------------------------------------------------------------
|
// BookingDataset ----------------------------------------------------------------------------------
|
||||||
template <>
|
template <>
|
||||||
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder const & fb) const
|
bool BookingDataset::IsSponsoredCandidate(FeatureBuilder const & fb) const
|
||||||
{
|
{
|
||||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||||
return false;
|
return false;
|
||||||
@@ -173,28 +177,4 @@ void BookingDataset::BuildObject(Object const & hotel, FBuilderFnT const & fn) c
|
|||||||
fn(fb);
|
fn(fb);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// @todo It looks like quite common FindMatchingObjectId function implementation.
|
|
||||||
template <>
|
|
||||||
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder const & fb) const
|
|
||||||
{
|
|
||||||
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
|
||||||
|
|
||||||
if (name.empty())
|
|
||||||
return Object::InvalidObjectId();
|
|
||||||
|
|
||||||
// Find |kMaxSelectedElements| nearest values to a point, sorted by distance?
|
|
||||||
auto const bookingIndexes = m_storage.GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
|
|
||||||
|
|
||||||
/// @todo Select best candidate? Assume we match "Foo Resort SPA hotel" feature. Have candidates:
|
|
||||||
/// - "Bar SPA hotel" in 10 meters (first);
|
|
||||||
/// - "Foo SPA hotel" in 100 meters (second, but best);
|
|
||||||
/// I suspect that first "Bar hotel" will be selected (wrong).
|
|
||||||
for (auto const j : bookingIndexes)
|
|
||||||
{
|
|
||||||
if (sponsored_scoring::Match(m_storage.GetObjectById(j), fb).IsMatched())
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Object::InvalidObjectId();
|
|
||||||
}
|
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|||||||
@@ -7,29 +7,30 @@
|
|||||||
|
|
||||||
namespace generator
|
namespace generator
|
||||||
{
|
{
|
||||||
struct BookingHotel : SponsoredObjectBase
|
class BookingHotel : public SponsoredObjectBase
|
||||||
{
|
{
|
||||||
enum class Fields
|
enum Fields
|
||||||
{
|
{
|
||||||
Id = 0,
|
Id = 0,
|
||||||
Latitude = 1,
|
Latitude,
|
||||||
Longtitude = 2,
|
Longitude,
|
||||||
Name = 3,
|
Name,
|
||||||
Address = 4,
|
Address,
|
||||||
Stars = 5,
|
Stars,
|
||||||
PriceCategory = 6,
|
PriceCategory,
|
||||||
RatingBooking = 7,
|
RatingBooking,
|
||||||
RatingUsers = 8,
|
RatingUsers,
|
||||||
DescUrl = 9,
|
DescUrl,
|
||||||
Type = 10,
|
Type,
|
||||||
Translations = 11,
|
Translations,
|
||||||
|
|
||||||
Counter
|
Counter
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit BookingHotel(std::string const & src);
|
public:
|
||||||
|
explicit BookingHotel(std::string src);
|
||||||
|
|
||||||
static constexpr size_t FieldIndex(Fields field) { return SponsoredObjectBase::FieldIndex(field); }
|
static constexpr size_t FieldsCount() { return Fields::Counter; }
|
||||||
static constexpr size_t FieldsCount() { return SponsoredObjectBase::FieldsCount<Fields>(); }
|
|
||||||
|
|
||||||
uint32_t m_stars = 0;
|
uint32_t m_stars = 0;
|
||||||
uint32_t m_priceCategory = 0;
|
uint32_t m_priceCategory = 0;
|
||||||
@@ -37,6 +38,7 @@ struct BookingHotel : SponsoredObjectBase
|
|||||||
double m_ratingUser = 0.0;
|
double m_ratingUser = 0.0;
|
||||||
uint32_t m_type = 0;
|
uint32_t m_type = 0;
|
||||||
std::string m_translations;
|
std::string m_translations;
|
||||||
|
std::string m_descUrl;
|
||||||
};
|
};
|
||||||
|
|
||||||
using BookingDataset = SponsoredDataset<BookingHotel>;
|
using BookingDataset = SponsoredDataset<BookingHotel>;
|
||||||
|
|||||||
@@ -1,22 +1,18 @@
|
|||||||
#include "generator/booking_dataset.hpp"
|
//#include "generator/booking_dataset.hpp"
|
||||||
|
|
||||||
#include "generator/feature_builder.hpp"
|
#include "generator/feature_builder.hpp"
|
||||||
|
#include "generator/feature_maker.hpp"
|
||||||
//#include "generator/opentable_dataset.hpp"
|
//#include "generator/opentable_dataset.hpp"
|
||||||
|
#include "generator/kayak_dataset.hpp"
|
||||||
#include "generator/osm_source.hpp"
|
#include "generator/osm_source.hpp"
|
||||||
#include "generator/processor_booking.hpp"
|
|
||||||
#include "generator/raw_generator.hpp"
|
#include "generator/raw_generator.hpp"
|
||||||
#include "generator/sponsored_scoring.hpp"
|
#include "generator/sponsored_dataset_inl.hpp"
|
||||||
#include "generator/translator_collection.hpp"
|
#include "generator/translator.hpp"
|
||||||
#include "generator/translator_factory.hpp"
|
|
||||||
|
|
||||||
#include "indexer/classificator_loader.hpp"
|
#include "indexer/classificator_loader.hpp"
|
||||||
|
|
||||||
#include "geometry/distance_on_sphere.hpp"
|
|
||||||
|
|
||||||
#include "base/file_name_utils.hpp"
|
#include "base/file_name_utils.hpp"
|
||||||
#include "base/exception.hpp"
|
#include "base/exception.hpp"
|
||||||
#include "base/geo_object_id.hpp"
|
#include "base/geo_object_id.hpp"
|
||||||
#include "base/stl_helpers.hpp"
|
|
||||||
#include "base/string_utils.hpp"
|
#include "base/string_utils.hpp"
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@@ -39,7 +35,7 @@ DEFINE_string(factors, "", "Factors output path");
|
|||||||
DEFINE_string(sample, "", "Path so sample file");
|
DEFINE_string(sample, "", "Path so sample file");
|
||||||
|
|
||||||
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
|
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
|
||||||
DEFINE_uint64(selection_size, 1000, "Selection size");
|
DEFINE_uint64(selection_size, 10000, "Selection size");
|
||||||
DEFINE_bool(generate, false, "Generate unmarked sample");
|
DEFINE_bool(generate, false, "Generate unmarked sample");
|
||||||
|
|
||||||
using namespace generator;
|
using namespace generator;
|
||||||
@@ -100,7 +96,7 @@ GenerateInfo GetGenerateInfo()
|
|||||||
info.SetNodeStorageType("map");
|
info.SetNodeStorageType("map");
|
||||||
info.SetOsmFileType("o5m");
|
info.SetOsmFileType("o5m");
|
||||||
|
|
||||||
info.m_intermediateDir = base::GetDirectory(FLAGS_factors);
|
info.m_cacheDir = info.m_intermediateDir = base::GetDirectory(FLAGS_osm);
|
||||||
|
|
||||||
// Set other info params here.
|
// Set other info params here.
|
||||||
|
|
||||||
@@ -189,36 +185,37 @@ vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
|
|||||||
return ReadSample<Object>(ist);
|
return ReadSample<Object>(ist);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PrintOsmUrl(std::ostream & os, ms::LatLon const & ll)
|
||||||
|
{
|
||||||
|
os << "# URL: https://www.openstreetmap.org/?mlat=" << ll.m_lat << "&mlon=" << ll.m_lon
|
||||||
|
<< "#map=18/" << ll.m_lat << "/" << ll.m_lon << endl;
|
||||||
|
};
|
||||||
|
|
||||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||||
void GenerateFactors(Dataset const & dataset,
|
void GenerateFactors(Dataset const & dataset,
|
||||||
map<base::GeoObjectId, FeatureBuilder> const & features,
|
map<base::GeoObjectId, FeatureBuilder> const & features,
|
||||||
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
|
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
|
||||||
{
|
{
|
||||||
|
ost << fixed << setprecision(6);
|
||||||
|
|
||||||
for (auto const & item : sampleItems)
|
for (auto const & item : sampleItems)
|
||||||
{
|
{
|
||||||
auto const & object = dataset.GetStorage().GetObjectById(item.m_sponsoredId);
|
auto const & object = dataset.GetStorage().GetObjectById(item.m_sponsoredId);
|
||||||
auto const & feature = features.at(item.m_osmId);
|
auto const & feature = features.at(item.m_osmId);
|
||||||
|
|
||||||
auto const score = generator::sponsored_scoring::Match(object, feature);
|
auto const score = dataset.CalcScore(object, feature);
|
||||||
|
|
||||||
auto const center = mercator::ToLatLon(feature.GetKeyPoint());
|
ost << "# ------------------------------------------" << endl;
|
||||||
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
|
ost << (score.IsMatched() ? "YES" : "NO") << "\t" << DebugPrint(feature.GetMostGenericOsmId())
|
||||||
auto const matched = score.IsMatched();
|
<< "\t" << object.m_id
|
||||||
|
<< "\tdistance: " << score.m_distance
|
||||||
ost << "# ------------------------------------------" << fixed << setprecision(6)
|
|
||||||
<< endl;
|
|
||||||
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
|
|
||||||
<< "\t " << object.m_id
|
|
||||||
<< "\tdistance: " << distanceMeters
|
|
||||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||||
<< "\tname score: " << score.m_nameSimilarityScore
|
<< "\tname score: " << score.m_nameSimilarityScore
|
||||||
<< "\tresult score: " << score.GetMatchingScore()
|
<< "\tresult score: " << score.GetMatchingScore()
|
||||||
<< endl;
|
<< endl;
|
||||||
ost << "# " << PrintBuilder(feature) << endl;
|
ost << "# " << PrintBuilder(feature) << endl;
|
||||||
ost << "# " << object << endl;
|
ost << "# " << object << endl;
|
||||||
ost << "# URL: https://www.openstreetmap.org/?mlat="
|
PrintOsmUrl(ost, object.m_latLon);
|
||||||
<< object.m_latLon.m_lat << "&mlon=" << object.m_latLon.m_lon << "#map=18/"
|
|
||||||
<< object.m_latLon.m_lat << "/" << object.m_latLon.m_lon << endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -241,38 +238,33 @@ void GenerateSample(Dataset const & dataset,
|
|||||||
if (FLAGS_selection_size < elementIndexes.size())
|
if (FLAGS_selection_size < elementIndexes.size())
|
||||||
elementIndexes.resize(FLAGS_selection_size);
|
elementIndexes.resize(FLAGS_selection_size);
|
||||||
|
|
||||||
stringstream outStream;
|
ost << fixed << setprecision(6);
|
||||||
|
|
||||||
for (auto osmId : elementIndexes)
|
for (auto osmId : elementIndexes)
|
||||||
{
|
{
|
||||||
auto const & fb = features.at(osmId);
|
auto const & fb = features.at(osmId);
|
||||||
auto const sponsoredIndexes = dataset.GetStorage().GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
|
auto const ll = mercator::ToLatLon(fb.GetKeyPoint());
|
||||||
|
auto const sponsoredIndexes = dataset.GetStorage().GetNearestObjects(ll);
|
||||||
|
|
||||||
|
ost << "# ------------------------------------------" << endl
|
||||||
|
<< "# " << PrintBuilder(fb) << endl;
|
||||||
|
PrintOsmUrl(ost, ll);
|
||||||
|
|
||||||
for (auto const sponsoredId : sponsoredIndexes)
|
for (auto const sponsoredId : sponsoredIndexes)
|
||||||
{
|
{
|
||||||
auto const & object = dataset.GetStorage().GetObjectById(sponsoredId);
|
auto const & object = dataset.GetStorage().GetObjectById(sponsoredId);
|
||||||
auto const score = sponsored_scoring::Match(object, fb);
|
auto const score = dataset.CalcScore(object, fb);
|
||||||
|
|
||||||
auto const center = mercator::ToLatLon(fb.GetKeyPoint());
|
ost << (score.IsMatched() ? "YES" : "NO") << "\t" << sponsoredId
|
||||||
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
|
<< "\tdistance: " << score.m_distance
|
||||||
auto const matched = score.IsMatched();
|
|
||||||
|
|
||||||
ost << "# ------------------------------------------" << fixed << setprecision(6)
|
|
||||||
<< endl;
|
|
||||||
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(osmId) << "\t " << sponsoredId
|
|
||||||
<< "\tdistance: " << distanceMeters
|
|
||||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||||
<< "\tname score: " << score.m_nameSimilarityScore
|
<< "\tname score: " << score.m_nameSimilarityScore
|
||||||
<< "\tresult score: " << score.GetMatchingScore()
|
<< "\tresult score: " << score.GetMatchingScore()
|
||||||
<< endl;
|
<< endl
|
||||||
ost << "# " << PrintBuilder(fb) << endl;
|
<< "# " << object << endl;
|
||||||
ost << "# " << object << endl;
|
PrintOsmUrl(ost, object.m_latLon);
|
||||||
ost << "# URL: https://www.openstreetmap.org/?mlat="
|
|
||||||
<< object.m_latLon.m_lat << "&mlon=" << object.m_latLon.m_lon
|
|
||||||
<< "#map=18/" << object.m_latLon.m_lat << "/" << object.m_latLon.m_lon << endl;
|
|
||||||
}
|
}
|
||||||
if (!sponsoredIndexes.empty())
|
|
||||||
ost << endl << endl;
|
ost << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,7 +272,7 @@ template <typename Dataset>
|
|||||||
string GetDatasetFilePath(GenerateInfo const & info);
|
string GetDatasetFilePath(GenerateInfo const & info);
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
string GetDatasetFilePath<BookingDataset>(GenerateInfo const & info)
|
string GetDatasetFilePath<KayakDataset>(GenerateInfo const & info)
|
||||||
{
|
{
|
||||||
return info.m_bookingDataFilename;
|
return info.m_bookingDataFilename;
|
||||||
}
|
}
|
||||||
@@ -291,6 +283,75 @@ string GetDatasetFilePath<BookingDataset>(GenerateInfo const & info)
|
|||||||
// return info.m_opentableDataFilename;
|
// return info.m_opentableDataFilename;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
|
class TranslatorMock : public Translator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TranslatorMock(std::shared_ptr<FeatureProcessorInterface> const & processor,
|
||||||
|
std::shared_ptr<generator::cache::IntermediateData> const & cache)
|
||||||
|
: Translator(processor, cache, std::make_shared<FeatureMakerSimple>(cache->GetCache()))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @name TranslatorInterface overrides.
|
||||||
|
/// @{
|
||||||
|
std::shared_ptr<TranslatorInterface> Clone() const override
|
||||||
|
{
|
||||||
|
UNREACHABLE();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
void Merge(TranslatorInterface const &) override
|
||||||
|
{
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
/// @}
|
||||||
|
};
|
||||||
|
|
||||||
|
class AggregateProcessor : public FeatureProcessorInterface
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// @name FeatureProcessorInterface overrides.
|
||||||
|
/// @{
|
||||||
|
std::shared_ptr<FeatureProcessorInterface> Clone() const override
|
||||||
|
{
|
||||||
|
UNREACHABLE();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
void Process(feature::FeatureBuilder & fb) override
|
||||||
|
{
|
||||||
|
auto const id = fb.GetMostGenericOsmId();
|
||||||
|
m_features.emplace(id, std::move(fb));
|
||||||
|
}
|
||||||
|
void Finish() override {}
|
||||||
|
/// @}
|
||||||
|
|
||||||
|
std::map<base::GeoObjectId, feature::FeatureBuilder> m_features;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Dataset> class DatasetFilter : public FilterInterface
|
||||||
|
{
|
||||||
|
Dataset const & m_dataset;
|
||||||
|
public:
|
||||||
|
DatasetFilter(Dataset const & dataset) : m_dataset(dataset) {}
|
||||||
|
|
||||||
|
/// @name FilterInterface overrides.
|
||||||
|
/// @{
|
||||||
|
std::shared_ptr<FilterInterface> Clone() const override
|
||||||
|
{
|
||||||
|
UNREACHABLE();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
bool IsAccepted(OsmElement const & e) const override
|
||||||
|
{
|
||||||
|
// All hotels under tourism tag.
|
||||||
|
return !e.GetTag("tourism").empty();
|
||||||
|
}
|
||||||
|
bool IsAccepted(feature::FeatureBuilder const & fb) const override
|
||||||
|
{
|
||||||
|
return m_dataset.IsSponsoredCandidate(fb);
|
||||||
|
}
|
||||||
|
/// @}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||||
void RunImpl(GenerateInfo & info)
|
void RunImpl(GenerateInfo & info)
|
||||||
{
|
{
|
||||||
@@ -298,16 +359,17 @@ void RunImpl(GenerateInfo & info)
|
|||||||
Dataset dataset(dataSetFilePath);
|
Dataset dataset(dataSetFilePath);
|
||||||
LOG_SHORT(LINFO, (dataset.GetStorage().Size(), "objects are loaded from a file:", dataSetFilePath));
|
LOG_SHORT(LINFO, (dataset.GetStorage().Size(), "objects are loaded from a file:", dataSetFilePath));
|
||||||
|
|
||||||
map<base::GeoObjectId, FeatureBuilder> features;
|
|
||||||
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
|
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
|
||||||
|
|
||||||
generator::cache::IntermediateDataObjectsCache objectsCache;
|
generator::cache::IntermediateDataObjectsCache objectsCache;
|
||||||
generator::cache::IntermediateData cacheLoader(objectsCache, info);
|
auto cache = std::make_shared<generator::cache::IntermediateData>(objectsCache, info);
|
||||||
auto translators = make_shared<TranslatorCollection>();
|
auto processor = make_shared<AggregateProcessor>();
|
||||||
auto processor = make_shared<ProcessorBooking<Dataset>>(dataset, features);
|
auto translator = std::make_shared<TranslatorMock>(processor, cache);
|
||||||
translators->Append(CreateTranslator(TranslatorType::Country, processor, cacheLoader.GetCache(), info));
|
translator->SetFilter(std::make_shared<DatasetFilter<Dataset>>(dataset));
|
||||||
|
|
||||||
RawGenerator generator(info);
|
RawGenerator generator(info);
|
||||||
generator.GenerateCustom(translators);
|
generator.GenerateCustom(translator);
|
||||||
|
CHECK(generator.Execute(), ());
|
||||||
|
|
||||||
if (FLAGS_generate)
|
if (FLAGS_generate)
|
||||||
{
|
{
|
||||||
@@ -319,7 +381,7 @@ void RunImpl(GenerateInfo & info)
|
|||||||
CHECK(ofst->is_open(), ("Can't open file", FLAGS_sample, strerror(errno)));
|
CHECK(ofst->is_open(), ("Can't open file", FLAGS_sample, strerror(errno)));
|
||||||
ost = ofst.get();
|
ost = ofst.get();
|
||||||
}
|
}
|
||||||
GenerateSample(dataset, features, *ost);
|
GenerateSample(dataset, processor->m_features, *ost);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -327,7 +389,7 @@ void RunImpl(GenerateInfo & info)
|
|||||||
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
|
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
|
||||||
ofstream ost(FLAGS_factors);
|
ofstream ost(FLAGS_factors);
|
||||||
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
|
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
|
||||||
GenerateFactors<Dataset>(dataset, features, sample, ost);
|
GenerateFactors<Dataset>(dataset, processor->m_features, sample, ost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -335,7 +397,7 @@ void Run(DatasetType const datasetType, GenerateInfo & info)
|
|||||||
{
|
{
|
||||||
switch (datasetType)
|
switch (datasetType)
|
||||||
{
|
{
|
||||||
case DatasetType::Booking: RunImpl<BookingDataset>(info); break;
|
case DatasetType::Booking: RunImpl<KayakDataset>(info); break;
|
||||||
//case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
|
//case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,51 +0,0 @@
|
|||||||
#include "generator/sponsored_scoring.hpp"
|
|
||||||
|
|
||||||
#include "generator/booking_dataset.hpp"
|
|
||||||
#include "generator/feature_builder.hpp"
|
|
||||||
|
|
||||||
#include "geometry/mercator.hpp"
|
|
||||||
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
// Calculated with tools/python/booking_hotels_quality.py.
|
|
||||||
double constexpr kOptimalThreshold = 0.304875;
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace generator
|
|
||||||
{
|
|
||||||
namespace sponsored_scoring
|
|
||||||
{
|
|
||||||
template <>
|
|
||||||
double MatchStats<BookingHotel>::GetMatchingScore() const
|
|
||||||
{
|
|
||||||
// TODO(mgsergio): Use tuner to get optimal function.
|
|
||||||
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
bool MatchStats<BookingHotel>::IsMatched() const
|
|
||||||
{
|
|
||||||
return GetMatchingScore() > kOptimalThreshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// @todo It looks like quite common Match function implementation,
|
|
||||||
/// because GetLatLon and GetName() needed.
|
|
||||||
template <>
|
|
||||||
MatchStats<BookingHotel> Match(BookingHotel const & h, feature::FeatureBuilder const & fb)
|
|
||||||
{
|
|
||||||
MatchStats<BookingHotel> score;
|
|
||||||
|
|
||||||
auto const fbCenter = mercator::ToLatLon(fb.GetKeyPoint());
|
|
||||||
auto const distance = ms::DistanceOnEarth(fbCenter, h.m_latLon);
|
|
||||||
score.m_linearNormDistanceScore =
|
|
||||||
impl::GetLinearNormDistanceScore(distance, BookingDataset::kDistanceLimitInMeters);
|
|
||||||
|
|
||||||
// TODO(mgsergio): Check all translations and use the best one.
|
|
||||||
score.m_nameSimilarityScore = impl::GetNameSimilarityScore(
|
|
||||||
h.m_name, std::string(fb.GetName(StringUtf8Multilang::kDefaultCode)));
|
|
||||||
|
|
||||||
return score;
|
|
||||||
}
|
|
||||||
} // namespace sponsored_scoring
|
|
||||||
} // namespace generator
|
|
||||||
@@ -3,7 +3,8 @@
|
|||||||
#include "generator/addresses_collector.hpp"
|
#include "generator/addresses_collector.hpp"
|
||||||
#include "generator/address_enricher.hpp"
|
#include "generator/address_enricher.hpp"
|
||||||
#include "generator/affiliation.hpp"
|
#include "generator/affiliation.hpp"
|
||||||
#include "generator/booking_dataset.hpp"
|
//#include "generator/booking_dataset.hpp"
|
||||||
|
#include "generator/kayak_dataset.hpp"
|
||||||
#include "generator/coastlines_generator.hpp"
|
#include "generator/coastlines_generator.hpp"
|
||||||
#include "generator/feature_builder.hpp"
|
#include "generator/feature_builder.hpp"
|
||||||
#include "generator/final_processor_utils.hpp"
|
#include "generator/final_processor_utils.hpp"
|
||||||
@@ -97,7 +98,8 @@ void CountryFinalProcessor::Order()
|
|||||||
|
|
||||||
void CountryFinalProcessor::ProcessBooking()
|
void CountryFinalProcessor::ProcessBooking()
|
||||||
{
|
{
|
||||||
BookingDataset dataset(m_hotelsFilename);
|
KayakDataset dataset(m_hotelsFilename);
|
||||||
|
LOG(LINFO, ("Loaded", dataset.GetStorage().Size(), "hotels from", m_hotelsFilename));
|
||||||
|
|
||||||
std::ofstream matchingLogStream;
|
std::ofstream matchingLogStream;
|
||||||
matchingLogStream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
matchingLogStream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||||
@@ -110,38 +112,46 @@ void CountryFinalProcessor::ProcessBooking()
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
std::stringstream sstream;
|
std::stringstream sstream;
|
||||||
|
sstream << std::fixed << std::setprecision(7);
|
||||||
|
|
||||||
|
size_t total = 0, matched = 0;
|
||||||
|
|
||||||
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, true /* mangleName */);
|
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, true /* mangleName */);
|
||||||
ForEachFeatureRawFormat<serialization_policy::MaxAccuracy>(path, [&](FeatureBuilder && fb, uint64_t)
|
ForEachFeatureRawFormat<serialization_policy::MaxAccuracy>(path, [&](FeatureBuilder && fb, uint64_t)
|
||||||
{
|
{
|
||||||
|
bool hotelProcessed = false;
|
||||||
|
if (dataset.IsSponsoredCandidate(fb))
|
||||||
|
{
|
||||||
|
++total;
|
||||||
auto const id = dataset.FindMatchingObjectId(fb);
|
auto const id = dataset.FindMatchingObjectId(fb);
|
||||||
if (id == BookingHotel::InvalidObjectId())
|
if (id != KayakHotel::InvalidObjectId())
|
||||||
{
|
|
||||||
writer.Write(fb);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
++matched;
|
||||||
|
hotelProcessed = true;
|
||||||
|
|
||||||
dataset.PreprocessMatchedOsmObject(id, fb, [&](FeatureBuilder & newFeature)
|
dataset.PreprocessMatchedOsmObject(id, fb, [&](FeatureBuilder & newFeature)
|
||||||
{
|
{
|
||||||
if (newFeature.PreSerialize())
|
if (newFeature.PreSerialize())
|
||||||
writer.Write(newFeature);
|
writer.Write(newFeature);
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
auto const & isHotelChecker = ftypes::IsHotelChecker::Instance();
|
|
||||||
if (isHotelChecker(fb.GetTypes()))
|
|
||||||
{
|
|
||||||
if (id != BookingHotel::InvalidObjectId())
|
|
||||||
sstream << id;
|
sstream << id;
|
||||||
|
|
||||||
auto const latLon = mercator::ToLatLon(fb.GetKeyPoint());
|
|
||||||
sstream << ',' << fb.GetMostGenericOsmId().GetEncodedId() << ','
|
|
||||||
<< strings::to_string_dac(latLon.m_lat, 7) << ','
|
|
||||||
<< strings::to_string_dac(latLon.m_lon, 7) << ',' << name << '\n';
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
sstream << "NO";
|
||||||
|
|
||||||
|
auto const ll = mercator::ToLatLon(fb.GetKeyPoint());
|
||||||
|
sstream << ",\t" << DebugPrint(fb.GetMostGenericOsmId()) << ",\t" << ll.m_lat << ',' << ll.m_lon << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hotelProcessed)
|
||||||
|
writer.Write(fb);
|
||||||
});
|
});
|
||||||
|
|
||||||
std::lock_guard guard(m);
|
std::lock_guard guard(m);
|
||||||
matchingLogStream << sstream.str();
|
matchingLogStream << sstream.str();
|
||||||
|
LOG(LINFO, ("Hotels (MWM, total, matched):", name, total, matched));
|
||||||
|
|
||||||
}, m_threadsCount);
|
}, m_threadsCount);
|
||||||
|
|
||||||
std::vector<FeatureBuilder> fbs;
|
std::vector<FeatureBuilder> fbs;
|
||||||
|
|||||||
@@ -43,6 +43,9 @@ std::vector<std::vector<std::string>> AppendToMwmTmp(std::vector<feature::Featur
|
|||||||
feature::AffiliationInterface const & affiliation,
|
feature::AffiliationInterface const & affiliation,
|
||||||
std::string const & temporaryMwmPath, size_t threadsCount = 1)
|
std::string const & temporaryMwmPath, size_t threadsCount = 1)
|
||||||
{
|
{
|
||||||
|
if (fbs.empty())
|
||||||
|
return {};
|
||||||
|
|
||||||
auto affiliations = GetAffiliations(fbs, affiliation, threadsCount);
|
auto affiliations = GetAffiliations(fbs, affiliation, threadsCount);
|
||||||
std::unordered_map<std::string, std::vector<size_t>> countryToFbsIndexes;
|
std::unordered_map<std::string, std::vector<size_t>> countryToFbsIndexes;
|
||||||
for (size_t i = 0; i < fbs.size(); ++i)
|
for (size_t i = 0; i < fbs.size(); ++i)
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ set(SRC
|
|||||||
source_data.hpp
|
source_data.hpp
|
||||||
source_to_element_test.cpp
|
source_to_element_test.cpp
|
||||||
speed_cameras_test.cpp
|
speed_cameras_test.cpp
|
||||||
|
sponsored_scoring_tests.cpp
|
||||||
srtm_parser_test.cpp
|
srtm_parser_test.cpp
|
||||||
tag_admixer_test.cpp
|
tag_admixer_test.cpp
|
||||||
tesselator_test.cpp
|
tesselator_test.cpp
|
||||||
|
|||||||
41
generator/generator_tests/sponsored_scoring_tests.cpp
Normal file
41
generator/generator_tests/sponsored_scoring_tests.cpp
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#include "testing/testing.hpp"
|
||||||
|
|
||||||
|
#include "generator/sponsored_scoring.hpp"
|
||||||
|
|
||||||
|
#include "geometry/distance_on_sphere.hpp"
|
||||||
|
#include "geometry/latlon.hpp"
|
||||||
|
|
||||||
|
namespace sponsored_scoring_tests
|
||||||
|
{
|
||||||
|
|
||||||
|
generator::sponsored::MatchStats GetMatch(ms::LatLon osmLL, std::string const & osmName,
|
||||||
|
ms::LatLon hotelLL, std::string const & hotelName)
|
||||||
|
{
|
||||||
|
// The same as SponsoredDataset::kDistanceLimitMeters
|
||||||
|
return { ms::DistanceOnEarth(osmLL, hotelLL), 150.0, hotelName, osmName };
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIT_TEST(SponsoredScoring_Paris)
|
||||||
|
{
|
||||||
|
TEST(!GetMatch({48.8474633, 2.3712106}, "Hôtel de Marseille",
|
||||||
|
{48.8473730, 2.3712020}, "Hotel Riesner").IsMatched(), ());
|
||||||
|
|
||||||
|
TEST(GetMatch({48.8760697, 2.3456749}, "Holiday Villa",
|
||||||
|
{48.8761570, 2.3455750}, "Hotel Villa Lafayette Paris IX").IsMatched(), ());
|
||||||
|
|
||||||
|
TEST(GetMatch({48.8664199, 2.2892440}, "Hôtel Baltimore",
|
||||||
|
{48.8663780, 2.2895710}, "Sofitel Paris Baltimore Tour Eiffel").IsMatched(), ());
|
||||||
|
|
||||||
|
TEST(!GetMatch({48.8808205, 2.3517253}, "Grand Hotel Magenta",
|
||||||
|
{48.8806950, 2.3521320}, "Hotel Cambrai").IsMatched(), ());
|
||||||
|
|
||||||
|
// But may be false on the ground.
|
||||||
|
TEST(GetMatch({48.8733283, 2.3004615}, "Hôtel Balzac",
|
||||||
|
{48.8735222, 2.3004904}, "Apart Inn Paris - Balzac").IsMatched(), ());
|
||||||
|
|
||||||
|
TEST(!GetMatch({48.8470895, 2.3710844}, "Hôtel Mignon",
|
||||||
|
{48.8473730, 2.3712020}, "Hotel Riesner").IsMatched(), ());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace sponsored_scoring_tests
|
||||||
74
generator/kayak_dataset.cpp
Normal file
74
generator/kayak_dataset.cpp
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
#include "generator/kayak_dataset.hpp"
|
||||||
|
|
||||||
|
#include "generator/feature_builder.hpp"
|
||||||
|
|
||||||
|
#include "indexer/ftypes_matcher.hpp"
|
||||||
|
|
||||||
|
#include "base/logging.hpp"
|
||||||
|
#include "base/string_utils.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
namespace generator
|
||||||
|
{
|
||||||
|
using namespace feature;
|
||||||
|
|
||||||
|
// BookingHotel ------------------------------------------------------------------------------------
|
||||||
|
KayakHotel::KayakHotel(std::string src)
|
||||||
|
{
|
||||||
|
using namespace strings;
|
||||||
|
|
||||||
|
// Patch strange entries.
|
||||||
|
if (src.starts_with("\","))
|
||||||
|
src.erase(0, 1);
|
||||||
|
|
||||||
|
/// @todo For fast parsing we can preprocess src (quotes) and return string_view's.
|
||||||
|
std::vector<std::string> rec;
|
||||||
|
strings::ParseCSVRow(src, ',', rec);
|
||||||
|
|
||||||
|
// Skip bad entries and header.
|
||||||
|
if (rec.size() != Fields::Counter || rec[0] == "ChainID")
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Assign id in the end in case of possible errors.
|
||||||
|
uint32_t id;
|
||||||
|
CLOG(LDEBUG, to_uint(rec[Fields::KayakHotelID], id), ()); /// @todo HotelID ?
|
||||||
|
CLOG(LDEBUG, to_double(rec[Fields::Latitude], m_latLon.m_lat), (rec[Fields::Latitude]));
|
||||||
|
CLOG(LDEBUG, to_double(rec[Fields::Longitude], m_latLon.m_lon), (rec[Fields::Longitude]));
|
||||||
|
|
||||||
|
if (!to_double(rec[Fields::OverallRating], m_overallRating))
|
||||||
|
m_overallRating = kInvalidRating;
|
||||||
|
|
||||||
|
m_name = rec[Fields::HotelName];
|
||||||
|
m_address = rec[Fields::HotelAddress];
|
||||||
|
|
||||||
|
m_id.Set(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// KayakDataset ----------------------------------------------------------------------------------
|
||||||
|
template <>
|
||||||
|
bool KayakDataset::IsSponsoredCandidate(FeatureBuilder const & fb) const
|
||||||
|
{
|
||||||
|
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void KayakDataset::PreprocessMatchedOsmObject(ObjectId id, FeatureBuilder & fb, FBuilderFnT const fn) const
|
||||||
|
{
|
||||||
|
auto const & hotel = m_storage.GetObjectById(id);
|
||||||
|
|
||||||
|
fb.SetHotelInfo(Metadata::SRC_KAYAK, hotel.m_id.Get(), hotel.m_overallRating, 0 /* priceCategory */);
|
||||||
|
|
||||||
|
fn(fb);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void KayakDataset::BuildObject(Object const &, FBuilderFnT const &) const
|
||||||
|
{
|
||||||
|
// Don't create new objects.
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace generator
|
||||||
66
generator/kayak_dataset.hpp
Normal file
66
generator/kayak_dataset.hpp
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "generator/sponsored_dataset.hpp"
|
||||||
|
#include "generator/sponsored_object_base.hpp"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace generator
|
||||||
|
{
|
||||||
|
class KayakHotel : public SponsoredObjectBase
|
||||||
|
{
|
||||||
|
enum Fields
|
||||||
|
{
|
||||||
|
ChainID = 0,
|
||||||
|
ChainName,
|
||||||
|
Checkin,
|
||||||
|
Checkout,
|
||||||
|
CountryCode,
|
||||||
|
CountryFileName,
|
||||||
|
CountryName,
|
||||||
|
CurrencyCode,
|
||||||
|
DateCreated,
|
||||||
|
Facilities,
|
||||||
|
HotelAddress,
|
||||||
|
HotelFileName,
|
||||||
|
HotelID,
|
||||||
|
HotelName,
|
||||||
|
HotelPostcode,
|
||||||
|
IataPlaceCode,
|
||||||
|
ImageID,
|
||||||
|
KayakHotelID,
|
||||||
|
LastUpdated,
|
||||||
|
Latitude,
|
||||||
|
Longitude,
|
||||||
|
MinRate,
|
||||||
|
OverallRating,
|
||||||
|
PlaceFileName,
|
||||||
|
PlaceID,
|
||||||
|
PlaceName,
|
||||||
|
PlaceType,
|
||||||
|
Popularity,
|
||||||
|
PropertyType,
|
||||||
|
PropertyTypeID,
|
||||||
|
SelfRated,
|
||||||
|
StarRating,
|
||||||
|
StateName,
|
||||||
|
StatePlaceID,
|
||||||
|
StatePlacefilename,
|
||||||
|
Themes,
|
||||||
|
Trademarked,
|
||||||
|
TransliteratedHotelName,
|
||||||
|
|
||||||
|
Counter
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit KayakHotel(std::string src);
|
||||||
|
|
||||||
|
static constexpr size_t FieldsCount() { return Fields::Counter; }
|
||||||
|
|
||||||
|
static double constexpr kInvalidRating = 0;
|
||||||
|
double m_overallRating = kInvalidRating;
|
||||||
|
};
|
||||||
|
|
||||||
|
using KayakDataset = SponsoredDataset<KayakHotel>;
|
||||||
|
} // namespace generator
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "generator/feature_builder.hpp"
|
|
||||||
#include "generator/feature_generator.hpp"
|
|
||||||
#include "generator/processor_interface.hpp"
|
|
||||||
|
|
||||||
#include "indexer/feature_data.hpp"
|
|
||||||
|
|
||||||
#include "base/assert.hpp"
|
|
||||||
#include "base/geo_object_id.hpp"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
namespace generator
|
|
||||||
{
|
|
||||||
template <typename Dataset>
|
|
||||||
class ProcessorBooking : public FeatureProcessorInterface
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ProcessorBooking(Dataset const & dataset,
|
|
||||||
std::map<base::GeoObjectId, feature::FeatureBuilder> & features)
|
|
||||||
: m_dataset(dataset), m_features(features)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
// FeatureProcessorInterface overrides:
|
|
||||||
std::shared_ptr<FeatureProcessorInterface> Clone() const override
|
|
||||||
{
|
|
||||||
CHECK(false, ());
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
void Process(feature::FeatureBuilder & fb) override
|
|
||||||
{
|
|
||||||
if (m_dataset.NecessaryMatchingConditionHolds(fb))
|
|
||||||
m_features.emplace(fb.GetMostGenericOsmId(), fb);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Finish() override {}
|
|
||||||
|
|
||||||
private:
|
|
||||||
Dataset const & m_dataset;
|
|
||||||
std::map<base::GeoObjectId, feature::FeatureBuilder> & m_features;
|
|
||||||
};
|
|
||||||
} // namespace generator
|
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "generator/factory_utils.hpp"
|
#include "generator/factory_utils.hpp"
|
||||||
#include "generator/processor_booking.hpp"
|
|
||||||
#include "generator/processor_coastline.hpp"
|
#include "generator/processor_coastline.hpp"
|
||||||
//#include "generator/processor_complex.hpp"
|
//#include "generator/processor_complex.hpp"
|
||||||
#include "generator/processor_country.hpp"
|
#include "generator/processor_country.hpp"
|
||||||
|
|||||||
@@ -141,6 +141,7 @@ void RawGenerator::GenerateCoasts()
|
|||||||
|
|
||||||
void RawGenerator::GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator)
|
void RawGenerator::GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator)
|
||||||
{
|
{
|
||||||
|
CHECK(translator, ());
|
||||||
m_translators->Append(translator);
|
m_translators->Append(translator);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,6 +149,7 @@ void RawGenerator::GenerateCustom(
|
|||||||
std::shared_ptr<TranslatorInterface> const & translator,
|
std::shared_ptr<TranslatorInterface> const & translator,
|
||||||
std::shared_ptr<FinalProcessorIntermediateMwmInterface> const & finalProcessor)
|
std::shared_ptr<FinalProcessorIntermediateMwmInterface> const & finalProcessor)
|
||||||
{
|
{
|
||||||
|
CHECK(translator && finalProcessor, ());
|
||||||
m_translators->Append(translator);
|
m_translators->Append(translator);
|
||||||
m_finalProcessors.emplace(finalProcessor);
|
m_finalProcessors.emplace(finalProcessor);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,16 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "generator/sponsored_object_storage.hpp"
|
#include "generator/sponsored_object_storage.hpp"
|
||||||
|
#include "generator/sponsored_scoring.hpp"
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace feature
|
namespace feature { class FeatureBuilder; }
|
||||||
{
|
|
||||||
class FeatureBuilder;
|
|
||||||
} // namespace feature
|
|
||||||
|
|
||||||
namespace generator
|
namespace generator
|
||||||
{
|
{
|
||||||
|
|
||||||
template<typename SponsoredObject>
|
template<typename SponsoredObject>
|
||||||
class SponsoredDataset
|
class SponsoredDataset
|
||||||
{
|
{
|
||||||
@@ -19,32 +18,34 @@ public:
|
|||||||
using Object = SponsoredObject;
|
using Object = SponsoredObject;
|
||||||
using ObjectId = typename Object::ObjectId;
|
using ObjectId = typename Object::ObjectId;
|
||||||
|
|
||||||
static double constexpr kDistanceLimitInMeters = 150;
|
static double constexpr kDistanceLimitMeters = 150;
|
||||||
static size_t constexpr kMaxSelectedElements = 3;
|
static size_t constexpr kMaxSelectedElements = 3;
|
||||||
|
|
||||||
explicit SponsoredDataset(std::string const & dataPath);
|
explicit SponsoredDataset(std::string const & dataPath);
|
||||||
|
|
||||||
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
|
/// @return true if |fb| satisfies some necessary conditions to match one or serveral objects from dataset.
|
||||||
/// objects from dataset.
|
bool IsSponsoredCandidate(feature::FeatureBuilder const & fb) const;
|
||||||
bool NecessaryMatchingConditionHolds(feature::FeatureBuilder const & fb) const;
|
ObjectId FindMatchingObjectId(feature::FeatureBuilder const & fb) const;
|
||||||
ObjectId FindMatchingObjectId(feature::FeatureBuilder const & e) const;
|
|
||||||
|
|
||||||
using FBuilderFnT = std::function<void(feature::FeatureBuilder &)>;
|
using FBuilderFnT = std::function<void(feature::FeatureBuilder &)>;
|
||||||
// Applies changes to a given osm object (for example, remove hotel type)
|
// Applies changes to a given osm object (for example, remove hotel type)
|
||||||
// and passes the result to |fn|.
|
// and passes the result to |fn|.
|
||||||
void PreprocessMatchedOsmObject(ObjectId matchedObjId, feature::FeatureBuilder & fb,
|
void PreprocessMatchedOsmObject(ObjectId matchedObjId, feature::FeatureBuilder & fb, FBuilderFnT const fn) const;
|
||||||
FBuilderFnT const fn) const;
|
|
||||||
// Creates objects and adds them to the map (MWM) via |fn|.
|
// Creates objects and adds them to the map (MWM) via |fn|.
|
||||||
void BuildOsmObjects(FBuilderFnT const & fn) const;
|
void BuildOsmObjects(FBuilderFnT const & fn) const;
|
||||||
|
|
||||||
|
static sponsored::MatchStats CalcScore(Object const & obj, feature::FeatureBuilder const & fb);
|
||||||
|
sponsored::MatchStats CalcScore(ObjectId objId, feature::FeatureBuilder const & fb) const
|
||||||
|
{
|
||||||
|
return CalcScore(m_storage.GetObjectById(objId), fb);
|
||||||
|
}
|
||||||
|
|
||||||
SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; }
|
SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BuildObject(Object const & object, FBuilderFnT const & fn) const;
|
void BuildObject(Object const & object, FBuilderFnT const & fn) const;
|
||||||
|
|
||||||
/// @return an id of a matched object or kInvalidObjectId on failure.
|
|
||||||
ObjectId FindMatchingObjectIdImpl(feature::FeatureBuilder const & fb) const;
|
|
||||||
|
|
||||||
SponsoredObjectStorage<Object> m_storage;
|
SponsoredObjectStorage<Object> m_storage;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|||||||
@@ -1,17 +1,21 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "generator/feature_builder.hpp"
|
||||||
#include "generator/sponsored_dataset.hpp"
|
#include "generator/sponsored_dataset.hpp"
|
||||||
|
|
||||||
|
#include "geometry/mercator.hpp"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
|
||||||
namespace generator
|
namespace generator
|
||||||
{
|
{
|
||||||
|
|
||||||
// SponsoredDataset --------------------------------------------------------------------------------
|
// SponsoredDataset --------------------------------------------------------------------------------
|
||||||
template <typename SponsoredObject>
|
template <typename SponsoredObject>
|
||||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath)
|
SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath)
|
||||||
: m_storage(kDistanceLimitInMeters, kMaxSelectedElements)
|
: m_storage(kDistanceLimitMeters, kMaxSelectedElements)
|
||||||
{
|
{
|
||||||
m_storage.LoadData(dataPath);
|
m_storage.LoadData(dataPath);
|
||||||
}
|
}
|
||||||
@@ -23,12 +27,46 @@ void SponsoredDataset<SponsoredObject>::BuildOsmObjects(FBuilderFnT const & fn)
|
|||||||
BuildObject(item.second, fn);
|
BuildObject(item.second, fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename SponsoredObject>
|
||||||
|
sponsored::MatchStats SponsoredDataset<SponsoredObject>::CalcScore(
|
||||||
|
Object const & obj, feature::FeatureBuilder const & fb)
|
||||||
|
{
|
||||||
|
auto const fbCenter = mercator::ToLatLon(fb.GetKeyPoint());
|
||||||
|
auto const distance = ms::DistanceOnEarth(fbCenter, obj.m_latLon);
|
||||||
|
|
||||||
|
/// @todo Input dataset is in English language.
|
||||||
|
auto name = fb.GetName(StringUtf8Multilang::kEnglishCode);
|
||||||
|
if (name.empty())
|
||||||
|
name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||||
|
|
||||||
|
return { distance, kDistanceLimitMeters, obj.m_name, std::string(name) };
|
||||||
|
}
|
||||||
|
|
||||||
template <typename SponsoredObject>
|
template <typename SponsoredObject>
|
||||||
typename SponsoredDataset<SponsoredObject>::ObjectId
|
typename SponsoredDataset<SponsoredObject>::ObjectId
|
||||||
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(feature::FeatureBuilder const & fb) const
|
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(feature::FeatureBuilder const & fb) const
|
||||||
{
|
{
|
||||||
if (NecessaryMatchingConditionHolds(fb))
|
// Find |kMaxSelectedElements| nearest values to a point, sorted by distance?
|
||||||
return FindMatchingObjectIdImpl(fb);
|
auto const indices = m_storage.GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
|
||||||
return Object::InvalidObjectId();
|
|
||||||
|
// Select best candidate by score.
|
||||||
|
double bestScore = -1;
|
||||||
|
auto res = Object::InvalidObjectId();
|
||||||
|
for (auto const i : indices)
|
||||||
|
{
|
||||||
|
auto const r = CalcScore(i, fb);
|
||||||
|
if (r.IsMatched())
|
||||||
|
{
|
||||||
|
double const score = r.GetMatchingScore();
|
||||||
|
if (score > bestScore)
|
||||||
|
{
|
||||||
|
bestScore = score;
|
||||||
|
res = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|||||||
@@ -22,22 +22,15 @@ struct SponsoredObjectBase
|
|||||||
|
|
||||||
virtual ~SponsoredObjectBase() = default;
|
virtual ~SponsoredObjectBase() = default;
|
||||||
|
|
||||||
template<typename Fields>
|
|
||||||
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
|
|
||||||
|
|
||||||
template<typename Fields>
|
|
||||||
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
|
|
||||||
|
|
||||||
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
||||||
|
|
||||||
ObjectId m_id{InvalidObjectId()};
|
ObjectId m_id{InvalidObjectId()};
|
||||||
ms::LatLon m_latLon = ms::LatLon::Zero();
|
ms::LatLon m_latLon = ms::LatLon::Zero();
|
||||||
std::string m_name;
|
std::string m_name;
|
||||||
|
|
||||||
std::string m_street;
|
std::string m_street;
|
||||||
std::string m_houseNumber;
|
std::string m_houseNumber;
|
||||||
|
|
||||||
std::string m_address;
|
std::string m_address;
|
||||||
std::string m_descUrl;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
NEWTYPE_SIMPLE_OUTPUT(SponsoredObjectBase::ObjectId);
|
NEWTYPE_SIMPLE_OUTPUT(SponsoredObjectBase::ObjectId);
|
||||||
|
|||||||
@@ -107,7 +107,9 @@ public:
|
|||||||
|
|
||||||
for (std::string line; std::getline(src, line);)
|
for (std::string line; std::getline(src, line);)
|
||||||
{
|
{
|
||||||
Object object(line);
|
Object object(std::move(line));
|
||||||
|
line.clear();
|
||||||
|
|
||||||
if (object.m_id != Object::InvalidObjectId() &&
|
if (object.m_id != Object::InvalidObjectId() &&
|
||||||
excludedIds.find(object.m_id) == excludedIds.cend())
|
excludedIds.find(object.m_id) == excludedIds.cend())
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
#include "generator/sponsored_scoring.hpp"
|
#include "generator/sponsored_scoring.hpp"
|
||||||
|
|
||||||
|
#include "search/ranking_utils.hpp"
|
||||||
|
|
||||||
#include "indexer/search_string_utils.hpp"
|
#include "indexer/search_string_utils.hpp"
|
||||||
|
|
||||||
#include "base/math.hpp"
|
#include "base/math.hpp"
|
||||||
@@ -7,18 +9,50 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
namespace generator
|
||||||
|
{
|
||||||
|
namespace sponsored
|
||||||
|
{
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
using WeightedBagOfWords = std::vector<std::pair<strings::UniString, double>>;
|
using StringT = strings::UniString;
|
||||||
|
class SkipTokens
|
||||||
|
{
|
||||||
|
std::set<StringT> m_skip;
|
||||||
|
public:
|
||||||
|
SkipTokens()
|
||||||
|
{
|
||||||
|
/// @todo Add other common terms?
|
||||||
|
m_skip.insert(strings::MakeUniString("hotel"));
|
||||||
|
}
|
||||||
|
bool Has(StringT const & s) const
|
||||||
|
{
|
||||||
|
return m_skip.count(s) > 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<strings::UniString> StringToWords(std::string const & str)
|
using WeightedBagOfWords = std::vector<std::pair<StringT, double>>;
|
||||||
|
|
||||||
|
std::vector<StringT> StringToWords(std::string const & str)
|
||||||
{
|
{
|
||||||
auto result = search::NormalizeAndTokenizeString(str);
|
auto result = search::NormalizeAndTokenizeString(str);
|
||||||
std::sort(std::begin(result), std::end(result));
|
|
||||||
|
static SkipTokens toSkip;
|
||||||
|
auto it = std::remove_if(result.begin(), result.end(), [](StringT const & s)
|
||||||
|
{
|
||||||
|
return toSkip.Has(s) || search::IsStopWord(s);
|
||||||
|
});
|
||||||
|
|
||||||
|
// In case if name is like "The Hotel".
|
||||||
|
if (std::distance(result.begin(), it) > 0)
|
||||||
|
result.erase(it, result.end());
|
||||||
|
|
||||||
|
std::sort(result.begin(), result.end());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
WeightedBagOfWords MakeWeightedBagOfWords(std::vector<strings::UniString> const & words)
|
WeightedBagOfWords MakeWeightedBagOfWords(std::vector<StringT> const & words)
|
||||||
{
|
{
|
||||||
// TODO(mgsergio): Calculate tf-idsf score for every word.
|
// TODO(mgsergio): Calculate tf-idsf score for every word.
|
||||||
auto constexpr kTfIdfScorePlaceholder = 1;
|
auto constexpr kTfIdfScorePlaceholder = 1;
|
||||||
@@ -38,7 +72,7 @@ WeightedBagOfWords MakeWeightedBagOfWords(std::vector<strings::UniString> const
|
|||||||
|
|
||||||
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||||
{
|
{
|
||||||
double result{};
|
double result = 0;
|
||||||
|
|
||||||
auto lhsIt = begin(lhs);
|
auto lhsIt = begin(lhs);
|
||||||
auto rhsIt = begin(rhs);
|
auto rhsIt = begin(rhs);
|
||||||
@@ -77,12 +111,7 @@ double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords
|
|||||||
|
|
||||||
return product / (lhsLength * rhsLength);
|
return product / (lhsLength * rhsLength);
|
||||||
}
|
}
|
||||||
} // namespace
|
|
||||||
|
|
||||||
namespace generator
|
|
||||||
{
|
|
||||||
namespace impl
|
|
||||||
{
|
|
||||||
double GetLinearNormDistanceScore(double distance, double const maxDistance)
|
double GetLinearNormDistanceScore(double distance, double const maxDistance)
|
||||||
{
|
{
|
||||||
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
|
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
|
||||||
@@ -102,5 +131,16 @@ double GetNameSimilarityScore(std::string const & booking_name, std::string cons
|
|||||||
|
|
||||||
return WeightedBagOfWordsCos(aws, bws);
|
return WeightedBagOfWordsCos(aws, bws);
|
||||||
}
|
}
|
||||||
} // namespace impl
|
} // namespace
|
||||||
|
|
||||||
|
MatchStats::MatchStats(double distM, double distLimitM, std::string const & name, std::string const & fbName)
|
||||||
|
: m_distance(distM)
|
||||||
|
{
|
||||||
|
m_linearNormDistanceScore = GetLinearNormDistanceScore(distM, distLimitM);
|
||||||
|
|
||||||
|
// TODO(mgsergio): Check all translations and use the best one.
|
||||||
|
m_nameSimilarityScore = GetNameSimilarityScore(name, fbName);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace sponsored
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|||||||
@@ -2,36 +2,40 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace feature
|
|
||||||
{
|
|
||||||
class FeatureBuilder;
|
|
||||||
} // namespace feature
|
|
||||||
|
|
||||||
namespace generator
|
namespace generator
|
||||||
{
|
{
|
||||||
namespace impl
|
struct SponsoredObjectBase;
|
||||||
{
|
|
||||||
double GetLinearNormDistanceScore(double distance, double maxDistance);
|
|
||||||
double GetNameSimilarityScore(std::string const & booking_name, std::string const & osm_name);
|
|
||||||
} // namespace impl
|
|
||||||
|
|
||||||
namespace sponsored_scoring
|
namespace sponsored
|
||||||
{
|
{
|
||||||
/// Represents a match scoring statystics of a sponsored object agains osm object.
|
|
||||||
template <typename SponsoredObject>
|
|
||||||
struct MatchStats
|
|
||||||
{
|
|
||||||
/// Returns some score based on geven fields and classificator tuning.
|
|
||||||
double GetMatchingScore() const;
|
|
||||||
/// Returns true if GetMatchingScore is greater then some theshold.
|
|
||||||
bool IsMatched() const;
|
|
||||||
|
|
||||||
double m_linearNormDistanceScore{};
|
/// Represents a match scoring statistics of a sponsored object against OSM object.
|
||||||
double m_nameSimilarityScore{};
|
class MatchStats
|
||||||
|
{
|
||||||
|
// Calculated with tools/python/booking_hotels_quality.py.
|
||||||
|
static double constexpr kOptimalThreshold = 0.304875;
|
||||||
|
|
||||||
|
public:
|
||||||
|
MatchStats(double distM, double distLimitM, std::string const & name, std::string const & fbName);
|
||||||
|
|
||||||
|
/// @return some score based on geven fields and classificator tuning.
|
||||||
|
double GetMatchingScore() const
|
||||||
|
{
|
||||||
|
// TODO(mgsergio): Use tuner to get optimal function.
|
||||||
|
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @return true if GetMatchingScore is greater then some theshold.
|
||||||
|
bool IsMatched() const
|
||||||
|
{
|
||||||
|
return GetMatchingScore() > kOptimalThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
double m_distance;
|
||||||
|
double m_linearNormDistanceScore;
|
||||||
|
double m_nameSimilarityScore;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Matches a given sponsored object against a given OSM object.
|
} // namespace sponsored
|
||||||
template <typename SponsoredObject>
|
|
||||||
MatchStats<SponsoredObject> Match(SponsoredObject const & o, feature::FeatureBuilder const & fb);
|
|
||||||
} // namespace booking_scoring
|
|
||||||
} // namespace generator
|
} // namespace generator
|
||||||
|
|||||||
Reference in New Issue
Block a user