Restore booking flow.

Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
Viktor Govako
2023-06-23 22:55:21 -03:00
committed by Konstantin Pastbin
parent 8f1a0903bb
commit 556a474fda
25 changed files with 1390 additions and 9 deletions

View File

@@ -9,6 +9,10 @@ set(SRC
affiliation.hpp
altitude_generator.cpp
altitude_generator.hpp
# Should precede booking_dataset.cpp because of Unity build + template instantiation order.
booking_scoring.cpp
booking_dataset.cpp
booking_dataset.hpp
borders.cpp
borders.hpp
boundary_postcodes_enricher.cpp
@@ -196,6 +200,12 @@ set(SRC
routing_world_roads_generator.hpp
search_index_builder.cpp
search_index_builder.hpp
sponsored_dataset.hpp
sponsored_dataset_inl.hpp
sponsored_object_base.hpp
sponsored_object_storage.hpp
sponsored_scoring.cpp
sponsored_scoring.hpp
srtm_parser.cpp
srtm_parser.hpp
statistics.cpp
@@ -274,5 +284,6 @@ omim_add_tool_subdirectory(generator_tool)
#omim_add_tool_subdirectory(complex_generator)
omim_add_tool_subdirectory(feature_segments_checker)
omim_add_tool_subdirectory(srtm_coverage_checker)
omim_add_tool_subdirectory(booking_quality_check)
add_subdirectory(world_roads_builder)
add_subdirectory(address_parser)

View File

@@ -0,0 +1,200 @@
#include "generator/booking_dataset.hpp"
#include "generator/feature_builder.hpp"
#include "generator/sponsored_scoring.hpp"
#include "indexer/classificator.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "geometry/mercator.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include "boost/algorithm/string/replace.hpp"
namespace generator
{
using namespace feature;
// BookingHotel ------------------------------------------------------------------------------------
BookingHotel::BookingHotel(std::string const & src)
{
std::vector<std::string> rec;
strings::ParseCSVRow(src, '\t', rec);
CHECK_EQUAL(rec.size(), FieldsCount(), ("Error parsing hotels.tsv line:",
boost::replace_all_copy(src, "\t", "\\t")));
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Id)], m_id.Get()), ());
// TODO(mgsergio): Use ms::LatLon.
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::Latitude)], m_latLon.m_lat), ());
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::Longtitude)], m_latLon.m_lon), ());
m_name = rec[FieldIndex(Fields::Name)];
m_address = rec[FieldIndex(Fields::Address)];
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Stars)], m_stars), ());
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::PriceCategory)], m_priceCategory), ());
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::RatingBooking)], m_ratingBooking), ());
CLOG(LDEBUG, strings::to_double(rec[FieldIndex(Fields::RatingUsers)], m_ratingUser), ());
m_descUrl = rec[FieldIndex(Fields::DescUrl)];
CLOG(LDEBUG, strings::to_uint(rec[FieldIndex(Fields::Type)], m_type), ());
m_translations = rec[FieldIndex(Fields::Translations)];
}
// BookingDataset ----------------------------------------------------------------------------------
template <>
bool BookingDataset::NecessaryMatchingConditionHolds(FeatureBuilder const & fb) const
{
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
return false;
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
}
template <>
void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder & fb, FBuilderFnT const fn) const
{
// Turn a hotel into a simple building.
if (fb.GetGeomType() == GeomType::Area)
{
// Remove all information about the hotel.
auto & meta = fb.GetMetadata();
meta.Drop(Metadata::EType::FMD_STARS);
meta.Drop(Metadata::EType::FMD_WEBSITE);
meta.Drop(Metadata::EType::FMD_PHONE_NUMBER);
auto & params = fb.GetParams();
params.ClearName();
auto const tourism = classif().GetTypeByPath({"tourism"});
base::EraseIf(params.m_types, [tourism](uint32_t type)
{
ftype::TruncValue(type, 1);
return type == tourism;
});
}
fn(fb);
}
template <>
void BookingDataset::BuildObject(Object const & hotel, FBuilderFnT const & fn) const
{
FeatureBuilder fb;
fb.SetCenter(mercator::FromLatLon(hotel.m_latLon.m_lat, hotel.m_latLon.m_lon));
/// @todo SRC_BOOKING
fb.SetHotelInfo(Metadata::SRC_KAYAK, hotel.m_id.Get(), hotel.m_ratingUser, hotel.m_priceCategory);
auto & metadata = fb.GetMetadata();
metadata.Set(Metadata::FMD_WEBSITE, hotel.m_descUrl);
metadata.Set(Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
auto & params = fb.GetParams();
if (!hotel.m_street.empty())
params.SetStreet(hotel.m_street);
if (!hotel.m_houseNumber.empty())
params.AddHouseNumber(hotel.m_houseNumber);
if (!hotel.m_translations.empty())
{
// TODO(mgsergio): Move parsing to the hotel costruction stage.
std::vector<std::string> parts;
strings::ParseCSVRow(hotel.m_translations, '|', parts);
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
for (size_t i = 0; i < parts.size(); i += 3)
{
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
params.AddName(StringUtf8Multilang::GetLangByCode(langCode), parts[i + 1]);
// TODO(mgsergio): e.AddTag("addr:full:" + parts[i], parts[i + 2]);
}
}
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kEnglishCode), hotel.m_name);
auto const & clf = classif();
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
// Matching booking.com hotel types to OpenStreetMap values.
// Booking types are listed in the closed API docs.
switch (hotel.m_type)
{
case 19:
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
case 21:
case 206:
case 212: params.AddType(clf.GetTypeByPath({"tourism", "resort"})); break;
case 3:
case 23:
case 24:
case 25:
case 202:
case 207:
case 208:
case 209:
case 210:
case 216:
case 220:
case 223: params.AddType(clf.GetTypeByPath({"tourism", "guest_house"})); break;
case 14:
case 204:
case 213:
case 218:
case 219:
case 226:
case 222: params.AddType(clf.GetTypeByPath({"tourism", "hotel"})); break;
case 211:
case 224:
case 228: params.AddType(clf.GetTypeByPath({"tourism", "chalet"})); break;
case 13:
case 225:
case 203: params.AddType(clf.GetTypeByPath({"tourism", "hostel"})); break;
case 215:
case 221:
case 227:
case 2:
case 201: params.AddType(clf.GetTypeByPath({"tourism", "apartment"})); break;
case 214: params.AddType(clf.GetTypeByPath({"tourism", "camp_site"})); break;
default: params.AddType(clf.GetTypeByPath({"tourism", "hotel"})); break;
}
fn(fb);
}
/// @todo It looks like quite common FindMatchingObjectId function implementation.
template <>
BookingDataset::ObjectId BookingDataset::FindMatchingObjectIdImpl(FeatureBuilder const & fb) const
{
auto const name = fb.GetName(StringUtf8Multilang::kDefaultCode);
if (name.empty())
return Object::InvalidObjectId();
// Find |kMaxSelectedElements| nearest values to a point, sorted by distance?
auto const bookingIndexes = m_storage.GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
/// @todo Select best candidate? Assume we match "Foo Resort SPA hotel" feature. Have candidates:
/// - "Bar SPA hotel" in 10 meters (first);
/// - "Foo SPA hotel" in 100 meters (second, but best);
/// I suspect that first "Bar hotel" will be selected (wrong).
for (auto const j : bookingIndexes)
{
if (sponsored_scoring::Match(m_storage.GetObjectById(j), fb).IsMatched())
return j;
}
return Object::InvalidObjectId();
}
} // namespace generator

View File

@@ -0,0 +1,43 @@
#pragma once
#include "generator/sponsored_dataset.hpp"
#include "generator/sponsored_object_base.hpp"
#include <string>
namespace generator
{
struct BookingHotel : SponsoredObjectBase
{
enum class Fields
{
Id = 0,
Latitude = 1,
Longtitude = 2,
Name = 3,
Address = 4,
Stars = 5,
PriceCategory = 6,
RatingBooking = 7,
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Translations = 11,
Counter
};
explicit BookingHotel(std::string const & src);
static constexpr size_t FieldIndex(Fields field) { return SponsoredObjectBase::FieldIndex(field); }
static constexpr size_t FieldsCount() { return SponsoredObjectBase::FieldsCount<Fields>(); }
uint32_t m_stars = 0;
uint32_t m_priceCategory = 0;
double m_ratingBooking = 0.0;
double m_ratingUser = 0.0;
uint32_t m_type = 0;
std::string m_translations;
};
using BookingDataset = SponsoredDataset<BookingHotel>;
} // namespace generator

View File

@@ -0,0 +1,11 @@
project(booking_quality_check)
set(SRC booking_quality_check.cpp)
#set(SRC booking_addr_match.cpp)
omim_add_executable(${PROJECT_NAME} ${SRC})
target_link_libraries(${PROJECT_NAME}
generator
gflags::gflags
)

View File

@@ -0,0 +1,96 @@
#include "generator/booking_dataset.hpp"
#include "generator/utils.hpp"
#include "search/reverse_geocoder.hpp"
#include "indexer/data_source.hpp"
#include "geometry/mercator.hpp"
#include "platform/platform.hpp"
#include <iostream>
#include <gflags/gflags.h>
DEFINE_string(booking_data, "", "Path to booking data in .tsv format");
DEFINE_string(user_resource_path, "", "Path to data directory (resources dir)");
DEFINE_string(data_path, "", "Path to mwm files (writable dir)");
DEFINE_string(locale, "en", "Locale of all the search queries");
DEFINE_int32(num_threads, 1, "Number of search engine threads");
namespace
{
class AddressMatcher
{
public:
AddressMatcher()
{
LoadDataSource(m_dataSource);
m_coder = std::make_unique<search::ReverseGeocoder>(m_dataSource);
}
template <typename SponsoredObject>
void operator()(SponsoredObject & object)
{
search::ReverseGeocoder::Address addr;
m_coder->GetNearbyAddress(mercator::FromLatLon(object.m_latLon), addr);
object.m_street = addr.GetStreetName();
object.m_houseNumber = addr.GetHouseNumber();
}
private:
FrozenDataSource m_dataSource;
std::unique_ptr<search::ReverseGeocoder> m_coder;
};
} // namespace
int main(int argc, char * argv[])
{
gflags::SetUsageMessage(
"Takes OSM XML data from stdin and creates"
" data and index files in several passes.");
gflags::ParseCommandLineFlags(&argc, &argv, true);
Platform & platform = GetPlatform();
if (!FLAGS_user_resource_path.empty())
platform.SetResourceDir(FLAGS_user_resource_path);
if (!FLAGS_data_path.empty())
platform.SetWritableDirForTests(FLAGS_data_path);
LOG(LINFO, ("writable dir =", platform.WritableDir()));
LOG(LINFO, ("resources dir =", platform.ResourcesDir()));
LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking_data));
generator::BookingDataset bookingDataset(FLAGS_booking_data);
AddressMatcher addressMatcher;
size_t matchedNum = 0;
size_t emptyAddr = 0;
auto const & storage = bookingDataset.GetStorage();
for (auto [_, hotel] : storage.GetObjects())
{
addressMatcher(hotel);
if (hotel.m_address.empty())
++emptyAddr;
if (hotel.HasAddresParts())
{
++matchedNum;
std::cout << "Hotel: " << hotel.m_address << " AddLoc: " << hotel.m_translations << " --> "
<< hotel.m_street << " " << hotel.m_houseNumber << std::endl;
}
}
std::cout << "Num of hotels: " << storage.Size() << " matched: " << matchedNum
<< " Empty addresses: " << emptyAddr << std::endl;
return 0;
}

View File

@@ -0,0 +1,373 @@
#include "generator/booking_dataset.hpp"
#include "generator/feature_builder.hpp"
//#include "generator/opentable_dataset.hpp"
#include "generator/osm_source.hpp"
#include "generator/processor_booking.hpp"
#include "generator/raw_generator.hpp"
#include "generator/sponsored_scoring.hpp"
#include "generator/translator_collection.hpp"
#include "generator/translator_factory.hpp"
#include "indexer/classificator_loader.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "base/file_name_utils.hpp"
#include "base/exception.hpp"
#include "base/geo_object_id.hpp"
#include "base/stl_helpers.hpp"
#include "base/string_utils.hpp"
#include <fstream>
#include <memory>
#include <numeric>
#include <random>
#include <sstream>
#include <gflags/gflags.h>
#include "boost/range/adaptor/map.hpp"
#include "boost/range/algorithm/copy.hpp"
using namespace std;
DEFINE_string(osm, "", "Input .o5m file");
DEFINE_string(booking, "", "Path to booking data in .tsv format");
DEFINE_string(opentable, "", "Path to opentable data in .tsv format");
DEFINE_string(factors, "", "Factors output path");
DEFINE_string(sample, "", "Path so sample file");
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
DEFINE_uint64(selection_size, 1000, "Selection size");
DEFINE_bool(generate, false, "Generate unmarked sample");
using namespace generator;
using namespace feature;
namespace
{
string PrintBuilder(FeatureBuilder const & fb)
{
ostringstream s;
s << "Id: " << DebugPrint(fb.GetMostGenericOsmId()) << '\t'
<< "Name: " << fb.GetName(StringUtf8Multilang::kDefaultCode) << '\t';
s << "Params: " << DebugPrint(fb.GetParams()) << '\t';
auto const center = mercator::ToLatLon(fb.GetKeyPoint());
s << "lat: " << center.m_lat << " lon: " << center.m_lon << '\t';
if (fb.GetGeomType() == GeomType::Point)
s << "GeomType: Point";
else if (fb.GetGeomType() == GeomType::Area)
s << "GeomType: Area";
else
CHECK(false, ());
return s.str();
}
DECLARE_EXCEPTION(ParseError, RootException);
base::GeoObjectId ReadDebuggedPrintedOsmId(string const & str)
{
istringstream sstr(str);
string type;
uint64_t id;
sstr >> type >> id;
if (sstr.fail())
MYTHROW(ParseError, ("Can't make osmId from string", str));
if (type == "node")
return base::MakeOsmNode(id);
if (type == "way")
return base::MakeOsmWay(id);
if (type == "relation")
return base::MakeOsmRelation(id);
MYTHROW(ParseError, ("Can't make osmId from string", str));
}
GenerateInfo GetGenerateInfo()
{
GenerateInfo info;
info.m_bookingDataFilename = FLAGS_booking;
//info.m_opentableDataFilename = FLAGS_opentable;
info.m_osmFileName = FLAGS_osm;
info.SetNodeStorageType("map");
info.SetOsmFileType("o5m");
info.m_intermediateDir = base::GetDirectory(FLAGS_factors);
// Set other info params here.
return info;
}
template <typename Object>
struct SampleItem
{
enum MatchStatus {Uninitialized, Yes, No};
using ObjectId = typename Object::ObjectId;
SampleItem() = default;
SampleItem(base::GeoObjectId const & osmId, ObjectId const sponsoredId,
MatchStatus match = Uninitialized)
: m_osmId(osmId), m_sponsoredId(sponsoredId), m_match(match)
{
}
base::GeoObjectId m_osmId;
ObjectId m_sponsoredId = Object::InvalidObjectId();
MatchStatus m_match = Uninitialized;
};
template <typename Object>
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string_view str)
{
if (str == "Yes")
return SampleItem<Object>::Yes;
if (str == "No")
return SampleItem<Object>::No;
if (str == "Uninitialized")
return SampleItem<Object>::Uninitialized;
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
}
template <typename Object>
SampleItem<Object> ReadSampleItem(string const & str)
{
SampleItem<Object> item;
auto const parts = strings::Tokenize(str, "\t");
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
"due to wrong number of fields."));
item.m_osmId = ReadDebuggedPrintedOsmId(string(parts[0]));
if (!strings::to_uint(parts[1], item.m_sponsoredId.Get()))
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
item.m_match = ReadMatchStatus<Object>(parts[2]);
return item;
}
template <typename Object>
vector<SampleItem<Object>> ReadSample(istream & ist)
{
vector<SampleItem<Object>> result;
size_t lineNumber = 1;
try
{
for (string line; getline(ist, line); ++lineNumber)
{
result.emplace_back(ReadSampleItem<Object>(line));
}
}
catch (ParseError const & e)
{
LOG_SHORT(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
exit(1);
}
return result;
}
template <typename Object>
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
{
ifstream ist(name);
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
return ReadSample<Object>(ist);
}
template <typename Dataset, typename Object = typename Dataset::Object>
void GenerateFactors(Dataset const & dataset,
map<base::GeoObjectId, FeatureBuilder> const & features,
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
{
for (auto const & item : sampleItems)
{
auto const & object = dataset.GetStorage().GetObjectById(item.m_sponsoredId);
auto const & feature = features.at(item.m_osmId);
auto const score = generator::sponsored_scoring::Match(object, feature);
auto const center = mercator::ToLatLon(feature.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
ost << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(feature.GetMostGenericOsmId())
<< "\t " << object.m_id
<< "\tdistance: " << distanceMeters
<< "\tdistance score: " << score.m_linearNormDistanceScore
<< "\tname score: " << score.m_nameSimilarityScore
<< "\tresult score: " << score.GetMatchingScore()
<< endl;
ost << "# " << PrintBuilder(feature) << endl;
ost << "# " << object << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_latLon.m_lat << "&mlon=" << object.m_latLon.m_lon << "#map=18/"
<< object.m_latLon.m_lat << "/" << object.m_latLon.m_lon << endl;
}
}
enum class DatasetType
{
Booking,
Opentable
};
template <typename Dataset, typename Object = typename Dataset::Object>
void GenerateSample(Dataset const & dataset,
map<base::GeoObjectId, FeatureBuilder> const & features, ostream & ost)
{
LOG_SHORT(LINFO, ("Num of elements:", features.size()));
vector<base::GeoObjectId> elementIndexes(features.size());
boost::copy(features | boost::adaptors::map_keys, begin(elementIndexes));
// TODO(mgsergio): Try RandomSample (from search:: at the moment of writing).
shuffle(elementIndexes.begin(), elementIndexes.end(), minstd_rand(static_cast<uint32_t>(FLAGS_seed)));
if (FLAGS_selection_size < elementIndexes.size())
elementIndexes.resize(FLAGS_selection_size);
stringstream outStream;
for (auto osmId : elementIndexes)
{
auto const & fb = features.at(osmId);
auto const sponsoredIndexes = dataset.GetStorage().GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
for (auto const sponsoredId : sponsoredIndexes)
{
auto const & object = dataset.GetStorage().GetObjectById(sponsoredId);
auto const score = sponsored_scoring::Match(object, fb);
auto const center = mercator::ToLatLon(fb.GetKeyPoint());
double const distanceMeters = ms::DistanceOnEarth(center, object.m_latLon);
auto const matched = score.IsMatched();
ost << "# ------------------------------------------" << fixed << setprecision(6)
<< endl;
ost << (matched ? 'y' : 'n') << " \t" << DebugPrint(osmId) << "\t " << sponsoredId
<< "\tdistance: " << distanceMeters
<< "\tdistance score: " << score.m_linearNormDistanceScore
<< "\tname score: " << score.m_nameSimilarityScore
<< "\tresult score: " << score.GetMatchingScore()
<< endl;
ost << "# " << PrintBuilder(fb) << endl;
ost << "# " << object << endl;
ost << "# URL: https://www.openstreetmap.org/?mlat="
<< object.m_latLon.m_lat << "&mlon=" << object.m_latLon.m_lon
<< "#map=18/" << object.m_latLon.m_lat << "/" << object.m_latLon.m_lon << endl;
}
if (!sponsoredIndexes.empty())
ost << endl << endl;
}
}
template <typename Dataset>
string GetDatasetFilePath(GenerateInfo const & info);
template <>
string GetDatasetFilePath<BookingDataset>(GenerateInfo const & info)
{
return info.m_bookingDataFilename;
}
//template <>
//string GetDatasetFilePath<OpentableDataset>(GenerateInfo const & info)
//{
// return info.m_opentableDataFilename;
//}
template <typename Dataset, typename Object = typename Dataset::Object>
void RunImpl(GenerateInfo & info)
{
auto const & dataSetFilePath = GetDatasetFilePath<Dataset>(info);
Dataset dataset(dataSetFilePath);
LOG_SHORT(LINFO, (dataset.GetStorage().Size(), "objects are loaded from a file:", dataSetFilePath));
map<base::GeoObjectId, FeatureBuilder> features;
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
generator::cache::IntermediateDataObjectsCache objectsCache;
generator::cache::IntermediateData cacheLoader(objectsCache, info);
auto translators = make_shared<TranslatorCollection>();
auto processor = make_shared<ProcessorBooking<Dataset>>(dataset, features);
translators->Append(CreateTranslator(TranslatorType::Country, processor, cacheLoader.GetCache(), info));
RawGenerator generator(info);
generator.GenerateCustom(translators);
if (FLAGS_generate)
{
ostream * ost = &cout;
unique_ptr<ofstream> ofst;
if (!FLAGS_sample.empty())
{
ofst = std::make_unique<ofstream>(FLAGS_sample);
CHECK(ofst->is_open(), ("Can't open file", FLAGS_sample, strerror(errno)));
ost = ofst.get();
}
GenerateSample(dataset, features, *ost);
}
else
{
auto const sample = ReadSampleFromFile<Object>(FLAGS_sample);
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
ofstream ost(FLAGS_factors);
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
GenerateFactors<Dataset>(dataset, features, sample, ost);
}
}
void Run(DatasetType const datasetType, GenerateInfo & info)
{
switch (datasetType)
{
case DatasetType::Booking: RunImpl<BookingDataset>(info); break;
//case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
}
}
} // namespace
int main(int argc, char * argv[])
{
gflags::SetUsageMessage("Calculates factors for given samples.");
if (argc == 1)
{
gflags::ShowUsageWithFlags(argv[0]);
exit(0);
}
gflags::ParseCommandLineFlags(&argc, &argv, true);
CHECK(!FLAGS_sample.empty(), ("Please specify sample path."));
CHECK(!FLAGS_osm.empty(), ("Please specify osm path."));
CHECK(!FLAGS_booking.empty() || !FLAGS_opentable.empty(),
("Please specify either booking or opentable path."));
CHECK(!FLAGS_factors.empty() || FLAGS_generate, ("Please either specify factors path"
"or use -generate."));
auto const datasetType = FLAGS_booking.empty() ? DatasetType::Opentable : DatasetType::Booking;
classificator::Load();
auto info = GetGenerateInfo();
GenerateIntermediateData(info);
Run(datasetType, info);
return 0;
}

View File

@@ -0,0 +1,51 @@
#include "generator/sponsored_scoring.hpp"
#include "generator/booking_dataset.hpp"
#include "generator/feature_builder.hpp"
#include "geometry/mercator.hpp"
namespace
{
// Calculated with tools/python/booking_hotels_quality.py.
double constexpr kOptimalThreshold = 0.304875;
} // namespace
namespace generator
{
namespace sponsored_scoring
{
template <>
double MatchStats<BookingHotel>::GetMatchingScore() const
{
// TODO(mgsergio): Use tuner to get optimal function.
return m_linearNormDistanceScore * m_nameSimilarityScore;
}
template <>
bool MatchStats<BookingHotel>::IsMatched() const
{
return GetMatchingScore() > kOptimalThreshold;
}
/// @todo It looks like quite common Match function implementation,
/// because GetLatLon and GetName() needed.
template <>
MatchStats<BookingHotel> Match(BookingHotel const & h, feature::FeatureBuilder const & fb)
{
MatchStats<BookingHotel> score;
auto const fbCenter = mercator::ToLatLon(fb.GetKeyPoint());
auto const distance = ms::DistanceOnEarth(fbCenter, h.m_latLon);
score.m_linearNormDistanceScore =
impl::GetLinearNormDistanceScore(distance, BookingDataset::kDistanceLimitInMeters);
// TODO(mgsergio): Check all translations and use the best one.
score.m_nameSimilarityScore = impl::GetNameSimilarityScore(
h.m_name, std::string(fb.GetName(StringUtf8Multilang::kDefaultCode)));
return score;
}
} // namespace sponsored_scoring
} // namespace generator

View File

@@ -2,6 +2,7 @@
#include "routing/routing_helpers.hpp"
#include "indexer/custom_keyvalue.hpp"
#include "indexer/feature_algo.hpp"
#include "indexer/feature_visibility.hpp"
#include "indexer/ftypes_matcher.hpp"
@@ -593,6 +594,29 @@ size_t FeatureBuilder::GetPointsCount() const
return counter;
}
void FeatureBuilder::SetHotelInfo(Metadata::ESource src, uint64_t id, double rating, uint8_t priceCategory)
{
// Normalize rating [0, 100]
if (rating < 0 || rating > 10)
rating = 0;
else
rating *= 10;
auto & meta = GetMetadata();
auto const append = [src, &meta](Metadata::EType type, auto val)
{
indexer::CustomKeyValue kv(meta.Get(type));
kv.Add(src, val);
meta.Set(type, kv.ToString());
};
append(Metadata::FMD_CUSTOM_IDS, id);
if (rating > 0)
append(Metadata::FMD_RATINGS, static_cast<uint8_t>(std::round(rating)));
if (priceCategory > 0)
append(Metadata::FMD_PRICE_RATES, priceCategory);
}
bool FeatureBuilder::IsDrawableInRange(int lowScale, int highScale) const
{
auto const types = GetTypesHolder();

View File

@@ -159,6 +159,8 @@ public:
Metadata const & GetMetadata() const { return m_params.GetMetadata(); }
Metadata & GetMetadata() { return m_params.GetMetadata(); }
void SetHotelInfo(Metadata::ESource src, uint64_t id, double rating, uint8_t priceCategory);
// To work with types and names based on drawing.
// Check classificator types for their compatibility with feature geometry type.
// Need to call when using any classificator types manipulating.

View File

@@ -3,6 +3,7 @@
#include "generator/addresses_collector.hpp"
#include "generator/address_enricher.hpp"
#include "generator/affiliation.hpp"
#include "generator/booking_dataset.hpp"
#include "generator/coastlines_generator.hpp"
#include "generator/feature_builder.hpp"
#include "generator/final_processor_utils.hpp"
@@ -12,6 +13,8 @@
#include "generator/osm2type.hpp"
#include "generator/region_meta.hpp"
#include "generator/sponsored_dataset_inl.hpp"
#include "routing/speed_camera_prohibition.hpp"
#include "indexer/classificator.hpp"
@@ -50,6 +53,9 @@ void CountryFinalProcessor::Process()
if (!m_coastlineGeomFilename.empty())
ProcessCoastline();
if (!m_hotelsFilename.empty())
ProcessBooking();
// 1. Process roundabouts and addr:interpolation first.
if (!m_miniRoundaboutsFilename.empty() || !m_addrInterpolFilename.empty())
ProcessRoundabouts();
@@ -89,6 +95,60 @@ void CountryFinalProcessor::Order()
}
*/
void CountryFinalProcessor::ProcessBooking()
{
BookingDataset dataset(m_hotelsFilename);
std::ofstream matchingLogStream;
matchingLogStream.exceptions(std::fstream::failbit | std::fstream::badbit);
matchingLogStream.open(m_hotelsStatusFilename);
std::mutex m;
ForEachMwmTmp(m_temporaryMwmPath, [&](auto const & name, auto const & path)
{
if (!IsCountry(name))
return;
std::stringstream sstream;
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, true /* mangleName */);
ForEachFeatureRawFormat<serialization_policy::MaxAccuracy>(path, [&](FeatureBuilder && fb, uint64_t)
{
auto const id = dataset.FindMatchingObjectId(fb);
if (id == BookingHotel::InvalidObjectId())
{
writer.Write(fb);
}
else
{
dataset.PreprocessMatchedOsmObject(id, fb, [&](FeatureBuilder & newFeature)
{
if (newFeature.PreSerialize())
writer.Write(newFeature);
});
}
auto const & isHotelChecker = ftypes::IsHotelChecker::Instance();
if (isHotelChecker(fb.GetTypes()))
{
if (id != BookingHotel::InvalidObjectId())
sstream << id;
auto const latLon = mercator::ToLatLon(fb.GetKeyPoint());
sstream << ',' << fb.GetMostGenericOsmId().GetEncodedId() << ','
<< strings::to_string_dac(latLon.m_lat, 7) << ','
<< strings::to_string_dac(latLon.m_lon, 7) << ',' << name << '\n';
}
});
std::lock_guard guard(m);
matchingLogStream << sstream.str();
}, m_threadsCount);
std::vector<FeatureBuilder> fbs;
dataset.BuildOsmObjects([&](auto && fb) { fbs.emplace_back(std::move(fb)); });
AppendToMwmTmp(fbs, *m_affiliations, m_temporaryMwmPath, m_threadsCount);
}
void CountryFinalProcessor::ProcessRoundabouts()
{
auto const roundabouts = ReadMiniRoundabouts(m_miniRoundaboutsFilename);

View File

@@ -41,6 +41,12 @@ public:
m_addressPath = dir;
}
void SetHotels(std::string const & hotelsFile, std::string const & statusFile)
{
m_hotelsFilename = hotelsFile;
m_hotelsStatusFilename = statusFile;
}
void SetCityBoundariesFiles(std::string const & collectorFile)
{
m_boundariesCollectorFile = collectorFile;
@@ -54,6 +60,7 @@ public:
private:
//void Order();
void ProcessCoastline();
void ProcessBooking();
void ProcessRoundabouts();
void AddFakeNodes();
void AddIsolines();
@@ -63,12 +70,13 @@ private:
bool IsCountry(std::string const & filename);
std::string m_borderPath;
std::string m_temporaryMwmPath;
std::string m_intermediateDir;
std::string m_isolinesPath, m_addressPath;
std::string m_boundariesCollectorFile;
std::string m_coastlineGeomFilename;
std::string m_hotelsFilename;
std::string m_hotelsStatusFilename;
std::string m_worldCoastsFilename;
std::string m_fakeNodesFilename;
std::string m_miniRoundaboutsFilename;

View File

@@ -1,7 +1,5 @@
#pragma once
#include "generator/cities_boundaries_builder.hpp"
#include "base/file_name_utils.hpp"
#include "base/logging.hpp"
@@ -49,6 +47,7 @@ struct GenerateInfo
OsmSourceType m_osmFileType = OsmSourceType::XML;
std::string m_osmFileName;
std::string m_bookingDataFilename;
std::string m_brandsFilename;
std::string m_brandsTranslationsFilename;

View File

@@ -7,6 +7,7 @@
#include "generator/geometry_holder.hpp"
#include "indexer/data_header.hpp"
#include "indexer/custom_keyvalue.hpp"
#include "indexer/feature_visibility.hpp"
#include "indexer/ftypes_matcher.hpp"
@@ -397,4 +398,21 @@ UNIT_CLASS_TEST(TestWithClassificator, FBuilder_RemoveInconsistentTypes)
TEST(!params.IsTypeExist(classif().GetTypeByPath({"hwtag", "nobicycle"})), ());
}
UNIT_CLASS_TEST(TestWithClassificator, FBuilder_Hotel)
{
FeatureBuilder fb;
auto const src = Metadata::SRC_KAYAK;
auto const & meta = fb.GetMetadata();
auto const isEqual = [&meta, src](Metadata::EType type, uint64_t val)
{
return indexer::CustomKeyValue(meta.Get(type)).Get(src) == val;
};
fb.SetHotelInfo(src, 777, 6.3, 4);
TEST(isEqual(Metadata::FMD_CUSTOM_IDS, 777), ());
TEST(isEqual(Metadata::FMD_RATINGS, 63), ());
TEST(isEqual(Metadata::FMD_PRICE_RATES, 4), ());
}
} // namespace feature_builder_test

View File

@@ -1,11 +1,12 @@
#include "test_generator.hpp"
#include "generator/borders.hpp"
#include "generator/camera_info_collector.hpp"
#include "generator/feature_sorter.hpp"
#include "generator/osm_source.hpp"
#include "generator/raw_generator.hpp"
#include "generator/camera_info_collector.hpp"
#include "generator/cities_boundaries_builder.hpp"
#include "generator/maxspeeds_builder.hpp"
#include "generator/restriction_generator.hpp"
#include "generator/road_access_generator.hpp"

View File

@@ -139,6 +139,7 @@ DEFINE_bool(
DEFINE_bool(generate_maxspeed, false, "Generate section with maxspeed of road features.");
// Sponsored-related.
DEFINE_string(booking_data, "", "Path to booking data in tsv format.");
DEFINE_string(complex_hierarchy_data, "", "Path to complex hierarchy in csv format.");
DEFINE_string(wikipedia_pages, "", "Input dir with wikipedia pages.");
@@ -240,6 +241,7 @@ MAIN_WITH_ERROR_HANDLING([](int argc, char ** argv)
genInfo.m_osmFileName = FLAGS_osm_file_name;
genInfo.m_failOnCoasts = FLAGS_fail_on_coasts;
genInfo.m_preloadCache = FLAGS_preload_cache;
genInfo.m_bookingDataFilename = FLAGS_booking_data;
genInfo.m_popularPlacesFilename = FLAGS_popular_places_data;
genInfo.m_brandsFilename = FLAGS_brands_data;
genInfo.m_brandsTranslationsFilename = FLAGS_brands_translations_data;

View File

@@ -189,6 +189,8 @@ RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor(
auto finalProcessor = std::make_shared<CountryFinalProcessor>(affiliations, m_genInfo.m_tmpDir, m_threadsCount);
finalProcessor->SetIsolinesDir(m_genInfo.m_isolinesDir);
finalProcessor->SetAddressesDir(m_genInfo.m_addressesDir);
finalProcessor->SetHotels(m_genInfo.m_bookingDataFilename, m_genInfo.GetIntermediateFileName("hotels_status.csv"));
finalProcessor->SetMiniRoundabouts(m_genInfo.GetIntermediateFileName(MINI_ROUNDABOUTS_FILENAME));
finalProcessor->SetAddrInterpolation(m_genInfo.GetIntermediateFileName(ADDR_INTERPOL_FILENAME));
if (addAds)

View File

@@ -0,0 +1,50 @@
#pragma once
#include "generator/sponsored_object_storage.hpp"
#include <functional>
#include <string>
namespace feature
{
class FeatureBuilder;
} // namespace feature
namespace generator
{
template<typename SponsoredObject>
class SponsoredDataset
{
public:
using Object = SponsoredObject;
using ObjectId = typename Object::ObjectId;
static double constexpr kDistanceLimitInMeters = 150;
static size_t constexpr kMaxSelectedElements = 3;
explicit SponsoredDataset(std::string const & dataPath);
/// @return true if |fb| satisfies some necessary conditions to match one or serveral
/// objects from dataset.
bool NecessaryMatchingConditionHolds(feature::FeatureBuilder const & fb) const;
ObjectId FindMatchingObjectId(feature::FeatureBuilder const & e) const;
using FBuilderFnT = std::function<void(feature::FeatureBuilder &)>;
// Applies changes to a given osm object (for example, remove hotel type)
// and passes the result to |fn|.
void PreprocessMatchedOsmObject(ObjectId matchedObjId, feature::FeatureBuilder & fb,
FBuilderFnT const fn) const;
// Creates objects and adds them to the map (MWM) via |fn|.
void BuildOsmObjects(FBuilderFnT const & fn) const;
SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; }
private:
void BuildObject(Object const & object, FBuilderFnT const & fn) const;
/// @return an id of a matched object or kInvalidObjectId on failure.
ObjectId FindMatchingObjectIdImpl(feature::FeatureBuilder const & fb) const;
SponsoredObjectStorage<Object> m_storage;
};
} // namespace generator

View File

@@ -0,0 +1,34 @@
#pragma once
#include "generator/sponsored_dataset.hpp"
#include <memory>
#include <string>
namespace generator
{
// SponsoredDataset --------------------------------------------------------------------------------
template <typename SponsoredObject>
SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath)
: m_storage(kDistanceLimitInMeters, kMaxSelectedElements)
{
m_storage.LoadData(dataPath);
}
template <typename SponsoredObject>
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(FBuilderFnT const & fn) const
{
for (auto const & item : m_storage.GetObjects())
BuildObject(item.second, fn);
}
template <typename SponsoredObject>
typename SponsoredDataset<SponsoredObject>::ObjectId
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(feature::FeatureBuilder const & fb) const
{
if (NecessaryMatchingConditionHolds(fb))
return FindMatchingObjectIdImpl(fb);
return Object::InvalidObjectId();
}
} // namespace generator

View File

@@ -0,0 +1,52 @@
#pragma once
#include "geometry/latlon.hpp"
#include "base/newtype.hpp"
#include <iomanip>
#include <limits>
#include <ostream>
#include <string>
namespace generator
{
struct SponsoredObjectBase
{
NEWTYPE(uint32_t, ObjectId);
static constexpr ObjectId InvalidObjectId()
{
return ObjectId(std::numeric_limits<typename ObjectId::RepType>::max());
}
virtual ~SponsoredObjectBase() = default;
template<typename Fields>
static constexpr size_t FieldIndex(Fields field) { return static_cast<size_t>(field); }
template<typename Fields>
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
ObjectId m_id{InvalidObjectId()};
ms::LatLon m_latLon = ms::LatLon::Zero();
std::string m_name;
std::string m_street;
std::string m_houseNumber;
std::string m_address;
std::string m_descUrl;
};
NEWTYPE_SIMPLE_OUTPUT(SponsoredObjectBase::ObjectId);
inline std::ostream & operator<<(std::ostream & s, SponsoredObjectBase const & h)
{
s << std::fixed << std::setprecision(7);
s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
<< "\t lat: " << h.m_latLon.m_lat << " lon: " << h.m_latLon.m_lon;
return s;
}
} // namespace generator

View File

@@ -0,0 +1,176 @@
#pragma once
#include "platform/platform.hpp"
#include "geometry/distance_on_sphere.hpp"
#include "geometry/latlon.hpp"
#include "base/logging.hpp"
#include "base/string_utils.hpp"
#include <fstream>
#include <functional>
#include <map>
#include <string>
#include <unordered_set>
#include <vector>
#include "std/boost_geometry.hpp"
#include <boost/geometry/index/rtree.hpp>
namespace generator
{
template <typename Object>
class SponsoredObjectStorage
{
public:
using ObjectId = typename Object::ObjectId;
using ObjectsContainer = std::map<ObjectId, Object>;
using ExcludedIdsContainer = std::unordered_set<ObjectId, typename ObjectId::Hash>;
SponsoredObjectStorage(double distanceLimitMeters, size_t maxSelectedElements)
: m_distanceLimitMeters(distanceLimitMeters)
, m_maxSelectedElements(maxSelectedElements)
{
}
double GetDistanceLimitInMeters() const
{
return m_distanceLimitMeters;
}
size_t GetMaxSelectedElements() const
{
return m_maxSelectedElements;
}
ObjectsContainer const & GetObjects() const
{
return m_objects;
}
size_t Size() const
{
return m_objects.size();
}
void LoadData(std::string const & dataPath)
{
if (dataPath.empty())
return;
std::ifstream dataSource(dataPath);
if (!dataSource)
{
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
return;
}
LoadData(dataSource, LoadExcludedIds({})); // empty exclude path
}
ExcludedIdsContainer LoadExcludedIds(std::string const & excludedIdsPath)
{
if (excludedIdsPath.empty())
return {};
std::ifstream source(excludedIdsPath);
if (!source)
{
LOG(LERROR, ("Error while opening", excludedIdsPath, ":", strerror(errno)));
return {};
}
ExcludedIdsContainer result;
for (std::string line; std::getline(source, line);)
{
ObjectId id{Object::InvalidObjectId()};
if (!strings::to_any(line, id.Get()))
{
LOG(LWARNING, ("Incorrect excluded sponsored id:", line));
continue;
}
if (id != Object::InvalidObjectId())
result.emplace(id);
}
return result;
}
void LoadData(std::istream & src, ExcludedIdsContainer const & excludedIds)
{
m_objects.clear();
m_rtree.clear();
for (std::string line; std::getline(src, line);)
{
Object object(line);
if (object.m_id != Object::InvalidObjectId() &&
excludedIds.find(object.m_id) == excludedIds.cend())
{
m_objects.emplace(object.m_id, object);
}
}
for (auto const & item : m_objects)
{
auto const & object = item.second;
Box b(Point(object.m_latLon.m_lat, object.m_latLon.m_lon),
Point(object.m_latLon.m_lat, object.m_latLon.m_lon));
m_rtree.insert(make_pair(b, object.m_id));
}
}
Object const & GetObjectById(ObjectId id) const
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
Object & GetObjectById(ObjectId id)
{
auto const it = m_objects.find(id);
CHECK(it != end(m_objects), ("Got wrong object id:", id));
return it->second;
}
std::vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon) const
{
namespace bgi = boost::geometry::index;
std::vector<ObjectId> indexes;
for_each(bgi::qbegin(m_rtree, bgi::nearest(Point(latLon.m_lat, latLon.m_lon),
static_cast<unsigned>(m_maxSelectedElements))),
bgi::qend(m_rtree), [this, &latLon, &indexes](Value const & v)
{
auto const & object = GetObjectById(v.second);
double const dist = ms::DistanceOnEarth(latLon, object.m_latLon);
if (m_distanceLimitMeters != 0.0 && dist > m_distanceLimitMeters)
return;
indexes.emplace_back(v.second);
});
return indexes;
}
private:
// TODO(mgsergio): Get rid of Box since boost::rtree supports point as value type.
// TODO(mgsergio): Use mercator instead of latlon or boost::geometry::cs::spherical_equatorial
// instead of boost::geometry::cs::cartesian.
using Point = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
using Box = boost::geometry::model::box<Point>;
using Value = std::pair<Box, ObjectId>;
// Create the rtree using default constructor.
boost::geometry::index::rtree<Value, boost::geometry::index::quadratic<16>> m_rtree;
ObjectsContainer m_objects;
double const m_distanceLimitMeters;
size_t const m_maxSelectedElements;
};
} // namespace generator

View File

@@ -0,0 +1,106 @@
#include "generator/sponsored_scoring.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/math.hpp"
#include <algorithm>
#include <vector>
namespace
{
using WeightedBagOfWords = std::vector<std::pair<strings::UniString, double>>;
std::vector<strings::UniString> StringToWords(std::string const & str)
{
auto result = search::NormalizeAndTokenizeString(str);
std::sort(std::begin(result), std::end(result));
return result;
}
WeightedBagOfWords MakeWeightedBagOfWords(std::vector<strings::UniString> const & words)
{
// TODO(mgsergio): Calculate tf-idsf score for every word.
auto constexpr kTfIdfScorePlaceholder = 1;
WeightedBagOfWords result;
for (size_t i = 0; i < words.size(); ++i)
{
result.emplace_back(words[i], kTfIdfScorePlaceholder);
while (i + 1 < words.size() && words[i] == words[i + 1])
{
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
++i;
}
}
return result;
}
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
double result{};
auto lhsIt = begin(lhs);
auto rhsIt = begin(rhs);
while (lhsIt != end(lhs) && rhsIt != end(rhs))
{
if (lhsIt->first == rhsIt->first)
{
result += lhsIt->second * rhsIt->second;
++lhsIt;
++rhsIt;
}
else if (lhsIt->first < rhsIt->first)
{
++lhsIt;
}
else
{
++rhsIt;
}
}
return result;
}
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
{
auto const product = WeightedBagsDotProduct(lhs, rhs);
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
// WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
// if every element of either lhs or rhs is 0.0.
if (product == 0.0)
return 0.0;
return product / (lhsLength * rhsLength);
}
} // namespace
namespace generator
{
namespace impl
{
double GetLinearNormDistanceScore(double distance, double const maxDistance)
{
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
distance = base::Clamp(distance, 0.0, maxDistance);
return 1.0 - distance / maxDistance;
}
double GetNameSimilarityScore(std::string const & booking_name, std::string const & osm_name)
{
auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));
if (aws.empty() && bws.empty())
return 1.0;
if (aws.empty() || bws.empty())
return 0.0;
return WeightedBagOfWordsCos(aws, bws);
}
} // namespace impl
} // namespace generator

View File

@@ -0,0 +1,37 @@
#pragma once
#include <string>
namespace feature
{
class FeatureBuilder;
} // namespace feature
namespace generator
{
namespace impl
{
double GetLinearNormDistanceScore(double distance, double maxDistance);
double GetNameSimilarityScore(std::string const & booking_name, std::string const & osm_name);
} // namespace impl
namespace sponsored_scoring
{
/// Represents a match scoring statystics of a sponsored object agains osm object.
template <typename SponsoredObject>
struct MatchStats
{
/// Returns some score based on geven fields and classificator tuning.
double GetMatchingScore() const;
/// Returns true if GetMatchingScore is greater then some theshold.
bool IsMatched() const;
double m_linearNormDistanceScore{};
double m_nameSimilarityScore{};
};
/// Matches a given sponsored object against a given OSM object.
template <typename SponsoredObject>
MatchStats<SponsoredObject> Match(SponsoredObject const & o, feature::FeatureBuilder const & fb);
} // namespace booking_scoring
} // namespace generator

View File

@@ -98,6 +98,27 @@ std::unique_ptr<FeatureType> FeatureGetter::GetFeatureByIndex(uint32_t index) co
return m_guard->GetFeatureByIndex(index);
}
void LoadDataSource(DataSource & dataSource)
{
std::vector<platform::LocalCountryFile> localFiles;
Platform & platform = GetPlatform();
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
-1 /* latestVersion */, localFiles);
for (auto const & localFile : localFiles)
{
LOG(LINFO, ("Found mwm:", localFile));
try
{
dataSource.RegisterMap(localFile);
}
catch (RootException const & ex)
{
CHECK(false, (ex.Msg(), "Bad mwm file:", localFile));
}
}
}
bool ParseFeatureIdToOsmIdMapping(std::string const & path,
std::unordered_map<uint32_t, base::GeoObjectId> & mapping)
{

View File

@@ -55,6 +55,8 @@ private:
MwmSet::MwmId m_mwmId;
};
void LoadDataSource(DataSource & dataSource);
class FeatureGetter
{
public:

View File

@@ -104,24 +104,26 @@ class StagePreprocess(Stage):
@outer_stage
@depends_from_internal(
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
D(settings.HOTELS_URL, PathProvider.hotels_path),
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
)
@test_stage(
Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
)
# @test_stage(
# Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
# )
class StageFeatures(Stage):
def apply(self, env: Env):
extra = {}
if is_accepted(env, StageDescriptions):
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
extra.update({"booking_data": env.paths.hotels_path})
if env.production:
extra.update(
{
"booking_data": env.paths.hotels_path,
"promo_catalog_cities": env.paths.promo_catalog_cities_path,
"popular_places_data": env.paths.popularity_path,
"brands_data": env.paths.food_paths,