mirror of
https://codeberg.org/comaps/comaps
synced 2025-12-19 13:03:36 +00:00
Compare commits
3 Commits
6a20269819
...
generator-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6368ba477f | ||
|
|
e7c04c5459 | ||
|
|
556a474fda |
@@ -9,6 +9,8 @@ set(SRC
|
||||
affiliation.hpp
|
||||
altitude_generator.cpp
|
||||
altitude_generator.hpp
|
||||
booking_dataset.cpp
|
||||
booking_dataset.hpp
|
||||
borders.cpp
|
||||
borders.hpp
|
||||
boundary_postcodes_enricher.cpp
|
||||
@@ -122,6 +124,8 @@ set(SRC
|
||||
isolines_generator.hpp
|
||||
isolines_section_builder.cpp
|
||||
isolines_section_builder.hpp
|
||||
kayak_dataset.cpp
|
||||
kayak_dataset.hpp
|
||||
maxspeeds_builder.cpp
|
||||
maxspeeds_builder.hpp
|
||||
maxspeeds_collector.cpp
|
||||
@@ -196,6 +200,12 @@ set(SRC
|
||||
routing_world_roads_generator.hpp
|
||||
search_index_builder.cpp
|
||||
search_index_builder.hpp
|
||||
sponsored_dataset.hpp
|
||||
sponsored_dataset_inl.hpp
|
||||
sponsored_object_base.hpp
|
||||
sponsored_object_storage.hpp
|
||||
sponsored_scoring.cpp
|
||||
sponsored_scoring.hpp
|
||||
srtm_parser.cpp
|
||||
srtm_parser.hpp
|
||||
statistics.cpp
|
||||
@@ -274,5 +284,6 @@ omim_add_tool_subdirectory(generator_tool)
|
||||
#omim_add_tool_subdirectory(complex_generator)
|
||||
omim_add_tool_subdirectory(feature_segments_checker)
|
||||
omim_add_tool_subdirectory(srtm_coverage_checker)
|
||||
omim_add_tool_subdirectory(booking_quality_check)
|
||||
add_subdirectory(world_roads_builder)
|
||||
add_subdirectory(address_parser)
|
||||
|
||||
180
generator/booking_dataset.cpp
Normal file
180
generator/booking_dataset.cpp
Normal file
@@ -0,0 +1,180 @@
|
||||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "boost/algorithm/string/replace.hpp"
|
||||
|
||||
|
||||
namespace generator
|
||||
{
|
||||
using namespace feature;
|
||||
|
||||
// BookingHotel ------------------------------------------------------------------------------------
|
||||
BookingHotel::BookingHotel(std::string src)
|
||||
{
|
||||
/// @todo For fast parsing we can preprocess src (quotes) and return string_view's.
|
||||
std::vector<std::string> rec;
|
||||
strings::ParseCSVRow(src, '\t', rec);
|
||||
|
||||
CHECK_EQUAL(rec.size(), Fields::Counter,
|
||||
("Error parsing hotels entry:", boost::replace_all_copy(src, "\t", "\\t")));
|
||||
|
||||
// Assign id in the end in case of possible errors.
|
||||
uint32_t id;
|
||||
CLOG(LDEBUG, strings::to_uint(rec[Fields::Id], id), ());
|
||||
CLOG(LDEBUG, strings::to_double(rec[Fields::Latitude], m_latLon.m_lat), ());
|
||||
CLOG(LDEBUG, strings::to_double(rec[Fields::Longitude], m_latLon.m_lon), ());
|
||||
|
||||
m_name = rec[Fields::Name];
|
||||
m_address = rec[Fields::Address];
|
||||
|
||||
CLOG(LDEBUG, strings::to_uint(rec[Fields::Stars], m_stars), ());
|
||||
CLOG(LDEBUG, strings::to_uint(rec[Fields::PriceCategory], m_priceCategory), ());
|
||||
CLOG(LDEBUG, strings::to_double(rec[Fields::RatingBooking], m_ratingBooking), ());
|
||||
CLOG(LDEBUG, strings::to_double(rec[Fields::RatingUsers], m_ratingUser), ());
|
||||
|
||||
m_descUrl = rec[Fields::DescUrl];
|
||||
|
||||
CLOG(LDEBUG, strings::to_uint(rec[Fields::Type], m_type), ());
|
||||
|
||||
m_translations = rec[Fields::Translations];
|
||||
|
||||
m_id.Set(id);
|
||||
}
|
||||
|
||||
|
||||
// BookingDataset ----------------------------------------------------------------------------------
|
||||
template <>
|
||||
bool BookingDataset::IsSponsoredCandidate(FeatureBuilder const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
return false;
|
||||
|
||||
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
template <>
|
||||
void BookingDataset::PreprocessMatchedOsmObject(ObjectId, FeatureBuilder & fb, FBuilderFnT const fn) const
|
||||
{
|
||||
// Turn a hotel into a simple building.
|
||||
if (fb.GetGeomType() == GeomType::Area)
|
||||
{
|
||||
// Remove all information about the hotel.
|
||||
auto & meta = fb.GetMetadata();
|
||||
meta.Drop(Metadata::EType::FMD_STARS);
|
||||
meta.Drop(Metadata::EType::FMD_WEBSITE);
|
||||
meta.Drop(Metadata::EType::FMD_PHONE_NUMBER);
|
||||
|
||||
auto & params = fb.GetParams();
|
||||
params.ClearName();
|
||||
|
||||
auto const tourism = classif().GetTypeByPath({"tourism"});
|
||||
base::EraseIf(params.m_types, [tourism](uint32_t type)
|
||||
{
|
||||
ftype::TruncValue(type, 1);
|
||||
return type == tourism;
|
||||
});
|
||||
}
|
||||
|
||||
fn(fb);
|
||||
}
|
||||
|
||||
template <>
|
||||
void BookingDataset::BuildObject(Object const & hotel, FBuilderFnT const & fn) const
|
||||
{
|
||||
FeatureBuilder fb;
|
||||
|
||||
fb.SetCenter(mercator::FromLatLon(hotel.m_latLon.m_lat, hotel.m_latLon.m_lon));
|
||||
|
||||
/// @todo SRC_BOOKING
|
||||
//fb.SetHotelInfo(Metadata::SRC_KAYAK, hotel.m_id.Get(), hotel.m_ratingUser, hotel.m_priceCategory);
|
||||
auto & metadata = fb.GetMetadata();
|
||||
metadata.Set(Metadata::FMD_WEBSITE, hotel.m_descUrl);
|
||||
metadata.Set(Metadata::FMD_STARS, strings::to_string(hotel.m_stars));
|
||||
|
||||
auto & params = fb.GetParams();
|
||||
if (!hotel.m_street.empty())
|
||||
params.SetStreet(hotel.m_street);
|
||||
|
||||
if (!hotel.m_houseNumber.empty())
|
||||
params.AddHouseNumber(hotel.m_houseNumber);
|
||||
|
||||
if (!hotel.m_translations.empty())
|
||||
{
|
||||
// TODO(mgsergio): Move parsing to the hotel costruction stage.
|
||||
std::vector<std::string> parts;
|
||||
strings::ParseCSVRow(hotel.m_translations, '|', parts);
|
||||
CHECK_EQUAL(parts.size() % 3, 0, ("Invalid translation string:", hotel.m_translations));
|
||||
for (size_t i = 0; i < parts.size(); i += 3)
|
||||
{
|
||||
auto const langCode = StringUtf8Multilang::GetLangIndex(parts[i]);
|
||||
params.AddName(StringUtf8Multilang::GetLangByCode(langCode), parts[i + 1]);
|
||||
// TODO(mgsergio): e.AddTag("addr:full:" + parts[i], parts[i + 2]);
|
||||
}
|
||||
}
|
||||
params.AddName(StringUtf8Multilang::GetLangByCode(StringUtf8Multilang::kEnglishCode), hotel.m_name);
|
||||
|
||||
auto const & clf = classif();
|
||||
params.AddType(clf.GetTypeByPath({"sponsored", "booking"}));
|
||||
// Matching booking.com hotel types to OpenStreetMap values.
|
||||
// Booking types are listed in the closed API docs.
|
||||
switch (hotel.m_type)
|
||||
{
|
||||
case 19:
|
||||
case 205: params.AddType(clf.GetTypeByPath({"tourism", "motel"})); break;
|
||||
|
||||
case 21:
|
||||
case 206:
|
||||
case 212: params.AddType(clf.GetTypeByPath({"tourism", "resort"})); break;
|
||||
|
||||
case 3:
|
||||
case 23:
|
||||
case 24:
|
||||
case 25:
|
||||
case 202:
|
||||
case 207:
|
||||
case 208:
|
||||
case 209:
|
||||
case 210:
|
||||
case 216:
|
||||
case 220:
|
||||
case 223: params.AddType(clf.GetTypeByPath({"tourism", "guest_house"})); break;
|
||||
|
||||
case 14:
|
||||
case 204:
|
||||
case 213:
|
||||
case 218:
|
||||
case 219:
|
||||
case 226:
|
||||
case 222: params.AddType(clf.GetTypeByPath({"tourism", "hotel"})); break;
|
||||
|
||||
case 211:
|
||||
case 224:
|
||||
case 228: params.AddType(clf.GetTypeByPath({"tourism", "chalet"})); break;
|
||||
|
||||
case 13:
|
||||
case 225:
|
||||
case 203: params.AddType(clf.GetTypeByPath({"tourism", "hostel"})); break;
|
||||
|
||||
case 215:
|
||||
case 221:
|
||||
case 227:
|
||||
case 2:
|
||||
case 201: params.AddType(clf.GetTypeByPath({"tourism", "apartment"})); break;
|
||||
|
||||
case 214: params.AddType(clf.GetTypeByPath({"tourism", "camp_site"})); break;
|
||||
|
||||
default: params.AddType(clf.GetTypeByPath({"tourism", "hotel"})); break;
|
||||
}
|
||||
|
||||
fn(fb);
|
||||
}
|
||||
|
||||
} // namespace generator
|
||||
45
generator/booking_dataset.hpp
Normal file
45
generator/booking_dataset.hpp
Normal file
@@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
#include "generator/sponsored_object_base.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
class BookingHotel : public SponsoredObjectBase
|
||||
{
|
||||
enum Fields
|
||||
{
|
||||
Id = 0,
|
||||
Latitude,
|
||||
Longitude,
|
||||
Name,
|
||||
Address,
|
||||
Stars,
|
||||
PriceCategory,
|
||||
RatingBooking,
|
||||
RatingUsers,
|
||||
DescUrl,
|
||||
Type,
|
||||
Translations,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
public:
|
||||
explicit BookingHotel(std::string src);
|
||||
|
||||
static constexpr size_t FieldsCount() { return Fields::Counter; }
|
||||
|
||||
uint32_t m_stars = 0;
|
||||
uint32_t m_priceCategory = 0;
|
||||
double m_ratingBooking = 0.0;
|
||||
double m_ratingUser = 0.0;
|
||||
uint32_t m_type = 0;
|
||||
std::string m_translations;
|
||||
std::string m_descUrl;
|
||||
};
|
||||
|
||||
using BookingDataset = SponsoredDataset<BookingHotel>;
|
||||
} // namespace generator
|
||||
11
generator/booking_quality_check/CMakeLists.txt
Normal file
11
generator/booking_quality_check/CMakeLists.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
project(booking_quality_check)
|
||||
|
||||
set(SRC booking_quality_check.cpp)
|
||||
#set(SRC booking_addr_match.cpp)
|
||||
|
||||
omim_add_executable(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
generator
|
||||
gflags::gflags
|
||||
)
|
||||
96
generator/booking_quality_check/booking_addr_match.cpp
Normal file
96
generator/booking_quality_check/booking_addr_match.cpp
Normal file
@@ -0,0 +1,96 @@
|
||||
#include "generator/booking_dataset.hpp"
|
||||
#include "generator/utils.hpp"
|
||||
|
||||
#include "search/reverse_geocoder.hpp"
|
||||
|
||||
#include "indexer/data_source.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
|
||||
DEFINE_string(booking_data, "", "Path to booking data in .tsv format");
|
||||
DEFINE_string(user_resource_path, "", "Path to data directory (resources dir)");
|
||||
DEFINE_string(data_path, "", "Path to mwm files (writable dir)");
|
||||
DEFINE_string(locale, "en", "Locale of all the search queries");
|
||||
DEFINE_int32(num_threads, 1, "Number of search engine threads");
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class AddressMatcher
|
||||
{
|
||||
public:
|
||||
AddressMatcher()
|
||||
{
|
||||
LoadDataSource(m_dataSource);
|
||||
m_coder = std::make_unique<search::ReverseGeocoder>(m_dataSource);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void operator()(SponsoredObject & object)
|
||||
{
|
||||
search::ReverseGeocoder::Address addr;
|
||||
m_coder->GetNearbyAddress(mercator::FromLatLon(object.m_latLon), addr);
|
||||
object.m_street = addr.GetStreetName();
|
||||
object.m_houseNumber = addr.GetHouseNumber();
|
||||
}
|
||||
|
||||
private:
|
||||
FrozenDataSource m_dataSource;
|
||||
std::unique_ptr<search::ReverseGeocoder> m_coder;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
gflags::SetUsageMessage(
|
||||
"Takes OSM XML data from stdin and creates"
|
||||
" data and index files in several passes.");
|
||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
|
||||
if (!FLAGS_user_resource_path.empty())
|
||||
platform.SetResourceDir(FLAGS_user_resource_path);
|
||||
|
||||
if (!FLAGS_data_path.empty())
|
||||
platform.SetWritableDirForTests(FLAGS_data_path);
|
||||
|
||||
LOG(LINFO, ("writable dir =", platform.WritableDir()));
|
||||
LOG(LINFO, ("resources dir =", platform.ResourcesDir()));
|
||||
|
||||
LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking_data));
|
||||
|
||||
generator::BookingDataset bookingDataset(FLAGS_booking_data);
|
||||
AddressMatcher addressMatcher;
|
||||
|
||||
size_t matchedNum = 0;
|
||||
size_t emptyAddr = 0;
|
||||
auto const & storage = bookingDataset.GetStorage();
|
||||
for (auto [_, hotel] : storage.GetObjects())
|
||||
{
|
||||
addressMatcher(hotel);
|
||||
|
||||
if (hotel.m_address.empty())
|
||||
++emptyAddr;
|
||||
|
||||
if (hotel.HasAddresParts())
|
||||
{
|
||||
++matchedNum;
|
||||
std::cout << "Hotel: " << hotel.m_address << " AddLoc: " << hotel.m_translations << " --> "
|
||||
<< hotel.m_street << " " << hotel.m_houseNumber << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Num of hotels: " << storage.Size() << " matched: " << matchedNum
|
||||
<< " Empty addresses: " << emptyAddr << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
435
generator/booking_quality_check/booking_quality_check.cpp
Normal file
435
generator/booking_quality_check/booking_quality_check.cpp
Normal file
@@ -0,0 +1,435 @@
|
||||
//#include "generator/booking_dataset.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/feature_maker.hpp"
|
||||
//#include "generator/opentable_dataset.hpp"
|
||||
#include "generator/kayak_dataset.hpp"
|
||||
#include "generator/osm_source.hpp"
|
||||
#include "generator/raw_generator.hpp"
|
||||
#include "generator/sponsored_dataset_inl.hpp"
|
||||
#include "generator/translator.hpp"
|
||||
|
||||
#include "indexer/classificator_loader.hpp"
|
||||
|
||||
#include "base/file_name_utils.hpp"
|
||||
#include "base/exception.hpp"
|
||||
#include "base/geo_object_id.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include "boost/range/adaptor/map.hpp"
|
||||
#include "boost/range/algorithm/copy.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
DEFINE_string(osm, "", "Input .o5m file");
|
||||
DEFINE_string(booking, "", "Path to booking data in .tsv format");
|
||||
DEFINE_string(opentable, "", "Path to opentable data in .tsv format");
|
||||
DEFINE_string(factors, "", "Factors output path");
|
||||
DEFINE_string(sample, "", "Path so sample file");
|
||||
|
||||
DEFINE_uint64(seed, minstd_rand::default_seed, "Seed for random shuffle");
|
||||
DEFINE_uint64(selection_size, 10000, "Selection size");
|
||||
DEFINE_bool(generate, false, "Generate unmarked sample");
|
||||
|
||||
using namespace generator;
|
||||
using namespace feature;
|
||||
|
||||
namespace
|
||||
{
|
||||
string PrintBuilder(FeatureBuilder const & fb)
|
||||
{
|
||||
ostringstream s;
|
||||
|
||||
s << "Id: " << DebugPrint(fb.GetMostGenericOsmId()) << '\t'
|
||||
<< "Name: " << fb.GetName(StringUtf8Multilang::kDefaultCode) << '\t';
|
||||
|
||||
s << "Params: " << DebugPrint(fb.GetParams()) << '\t';
|
||||
|
||||
auto const center = mercator::ToLatLon(fb.GetKeyPoint());
|
||||
s << "lat: " << center.m_lat << " lon: " << center.m_lon << '\t';
|
||||
|
||||
if (fb.GetGeomType() == GeomType::Point)
|
||||
s << "GeomType: Point";
|
||||
else if (fb.GetGeomType() == GeomType::Area)
|
||||
s << "GeomType: Area";
|
||||
else
|
||||
CHECK(false, ());
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
DECLARE_EXCEPTION(ParseError, RootException);
|
||||
|
||||
base::GeoObjectId ReadDebuggedPrintedOsmId(string const & str)
|
||||
{
|
||||
istringstream sstr(str);
|
||||
string type;
|
||||
uint64_t id;
|
||||
sstr >> type >> id;
|
||||
|
||||
if (sstr.fail())
|
||||
MYTHROW(ParseError, ("Can't make osmId from string", str));
|
||||
|
||||
if (type == "node")
|
||||
return base::MakeOsmNode(id);
|
||||
if (type == "way")
|
||||
return base::MakeOsmWay(id);
|
||||
if (type == "relation")
|
||||
return base::MakeOsmRelation(id);
|
||||
|
||||
MYTHROW(ParseError, ("Can't make osmId from string", str));
|
||||
}
|
||||
|
||||
GenerateInfo GetGenerateInfo()
|
||||
{
|
||||
GenerateInfo info;
|
||||
info.m_hotelsPath = FLAGS_booking;
|
||||
//info.m_opentableDataFilename = FLAGS_opentable;
|
||||
info.m_osmFileName = FLAGS_osm;
|
||||
info.SetNodeStorageType("map");
|
||||
info.SetOsmFileType("o5m");
|
||||
|
||||
info.m_cacheDir = info.m_intermediateDir = base::GetDirectory(FLAGS_osm);
|
||||
|
||||
// Set other info params here.
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
struct SampleItem
|
||||
{
|
||||
enum MatchStatus {Uninitialized, Yes, No};
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
SampleItem() = default;
|
||||
|
||||
SampleItem(base::GeoObjectId const & osmId, ObjectId const sponsoredId,
|
||||
MatchStatus match = Uninitialized)
|
||||
: m_osmId(osmId), m_sponsoredId(sponsoredId), m_match(match)
|
||||
{
|
||||
}
|
||||
|
||||
base::GeoObjectId m_osmId;
|
||||
ObjectId m_sponsoredId = Object::InvalidObjectId();
|
||||
|
||||
MatchStatus m_match = Uninitialized;
|
||||
};
|
||||
|
||||
template <typename Object>
|
||||
typename SampleItem<Object>::MatchStatus ReadMatchStatus(string_view str)
|
||||
{
|
||||
if (str == "Yes")
|
||||
return SampleItem<Object>::Yes;
|
||||
|
||||
if (str == "No")
|
||||
return SampleItem<Object>::No;
|
||||
|
||||
if (str == "Uninitialized")
|
||||
return SampleItem<Object>::Uninitialized;
|
||||
|
||||
MYTHROW(ParseError, ("Can't make SampleItem::MatchStatus from string:", str));
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
SampleItem<Object> ReadSampleItem(string const & str)
|
||||
{
|
||||
SampleItem<Object> item;
|
||||
|
||||
auto const parts = strings::Tokenize(str, "\t");
|
||||
CHECK_EQUAL(parts.size(), 3, ("Cant't make SampleItem from string:", str,
|
||||
"due to wrong number of fields."));
|
||||
|
||||
item.m_osmId = ReadDebuggedPrintedOsmId(string(parts[0]));
|
||||
if (!strings::to_uint(parts[1], item.m_sponsoredId.Get()))
|
||||
MYTHROW(ParseError, ("Can't make uint32 from string:", parts[1]));
|
||||
item.m_match = ReadMatchStatus<Object>(parts[2]);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSample(istream & ist)
|
||||
{
|
||||
vector<SampleItem<Object>> result;
|
||||
|
||||
size_t lineNumber = 1;
|
||||
try
|
||||
{
|
||||
for (string line; getline(ist, line); ++lineNumber)
|
||||
{
|
||||
result.emplace_back(ReadSampleItem<Object>(line));
|
||||
}
|
||||
}
|
||||
catch (ParseError const & e)
|
||||
{
|
||||
LOG_SHORT(LERROR, ("Wrong format: line", lineNumber, e.Msg()));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Object>
|
||||
vector<SampleItem<Object>> ReadSampleFromFile(string const & name)
|
||||
{
|
||||
ifstream ist(name);
|
||||
CHECK(ist.is_open(), ("Can't open file:", name, strerror(errno)));
|
||||
return ReadSample<Object>(ist);
|
||||
}
|
||||
|
||||
void PrintOsmUrl(std::ostream & os, ms::LatLon const & ll)
|
||||
{
|
||||
os << "# URL: https://www.openstreetmap.org/?mlat=" << ll.m_lat << "&mlon=" << ll.m_lon
|
||||
<< "#map=18/" << ll.m_lat << "/" << ll.m_lon << endl;
|
||||
};
|
||||
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void GenerateFactors(Dataset const & dataset,
|
||||
map<base::GeoObjectId, FeatureBuilder> const & features,
|
||||
vector<SampleItem<Object>> const & sampleItems, ostream & ost)
|
||||
{
|
||||
ost << fixed << setprecision(6);
|
||||
|
||||
for (auto const & item : sampleItems)
|
||||
{
|
||||
auto const & object = dataset.GetStorage().GetObjectById(item.m_sponsoredId);
|
||||
auto const & feature = features.at(item.m_osmId);
|
||||
|
||||
auto const score = dataset.CalcScore(object, feature);
|
||||
|
||||
ost << "# ------------------------------------------" << endl;
|
||||
ost << (score.IsMatched() ? "YES" : "NO") << "\t" << DebugPrint(feature.GetMostGenericOsmId())
|
||||
<< "\t" << object.m_id
|
||||
<< "\tdistance: " << score.m_distance
|
||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||
<< "\tname score: " << score.m_nameSimilarityScore
|
||||
<< "\tresult score: " << score.GetMatchingScore()
|
||||
<< endl;
|
||||
ost << "# " << PrintBuilder(feature) << endl;
|
||||
ost << "# " << object << endl;
|
||||
PrintOsmUrl(ost, object.m_latLon);
|
||||
}
|
||||
}
|
||||
|
||||
enum class DatasetType
|
||||
{
|
||||
Booking,
|
||||
Opentable
|
||||
};
|
||||
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void GenerateSample(Dataset const & dataset,
|
||||
map<base::GeoObjectId, FeatureBuilder> const & features, ostream & ost)
|
||||
{
|
||||
LOG_SHORT(LINFO, ("Num of elements:", features.size()));
|
||||
vector<base::GeoObjectId> elementIndexes(features.size());
|
||||
boost::copy(features | boost::adaptors::map_keys, begin(elementIndexes));
|
||||
|
||||
// TODO(mgsergio): Try RandomSample (from search:: at the moment of writing).
|
||||
shuffle(elementIndexes.begin(), elementIndexes.end(), minstd_rand(static_cast<uint32_t>(FLAGS_seed)));
|
||||
if (FLAGS_selection_size < elementIndexes.size())
|
||||
elementIndexes.resize(FLAGS_selection_size);
|
||||
|
||||
ost << fixed << setprecision(6);
|
||||
for (auto osmId : elementIndexes)
|
||||
{
|
||||
auto const & fb = features.at(osmId);
|
||||
auto const ll = mercator::ToLatLon(fb.GetKeyPoint());
|
||||
auto const sponsoredIndexes = dataset.GetStorage().GetNearestObjects(ll);
|
||||
|
||||
ost << "# ------------------------------------------" << endl
|
||||
<< "# " << PrintBuilder(fb) << endl;
|
||||
PrintOsmUrl(ost, ll);
|
||||
|
||||
for (auto const sponsoredId : sponsoredIndexes)
|
||||
{
|
||||
auto const & object = dataset.GetStorage().GetObjectById(sponsoredId);
|
||||
auto const score = dataset.CalcScore(object, fb);
|
||||
|
||||
ost << (score.IsMatched() ? "YES" : "NO") << "\t" << sponsoredId
|
||||
<< "\tdistance: " << score.m_distance
|
||||
<< "\tdistance score: " << score.m_linearNormDistanceScore
|
||||
<< "\tname score: " << score.m_nameSimilarityScore
|
||||
<< "\tresult score: " << score.GetMatchingScore()
|
||||
<< endl
|
||||
<< "# " << object << endl;
|
||||
PrintOsmUrl(ost, object.m_latLon);
|
||||
}
|
||||
|
||||
ost << endl;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Dataset>
|
||||
std::unique_ptr<Dataset> CreateDataset(GenerateInfo const & info);
|
||||
|
||||
template <>
|
||||
std::unique_ptr<KayakDataset> CreateDataset<KayakDataset>(GenerateInfo const & info)
|
||||
{
|
||||
return std::make_unique<KayakDataset>(base::JoinPath(info.m_hotelsPath, "hotels.csv"),
|
||||
base::JoinPath(info.m_hotelsPath, "placefeed.csv"));
|
||||
}
|
||||
|
||||
//template <>
|
||||
//string GetDatasetFilePath<OpentableDataset>(GenerateInfo const & info)
|
||||
//{
|
||||
// return info.m_opentableDataFilename;
|
||||
//}
|
||||
|
||||
class TranslatorMock : public Translator
|
||||
{
|
||||
public:
|
||||
TranslatorMock(std::shared_ptr<FeatureProcessorInterface> const & processor,
|
||||
std::shared_ptr<generator::cache::IntermediateData> const & cache)
|
||||
: Translator(processor, cache, std::make_shared<FeatureMakerSimple>(cache->GetCache()))
|
||||
{
|
||||
}
|
||||
|
||||
/// @name TranslatorInterface overrides.
|
||||
/// @{
|
||||
std::shared_ptr<TranslatorInterface> Clone() const override
|
||||
{
|
||||
UNREACHABLE();
|
||||
return nullptr;
|
||||
}
|
||||
void Merge(TranslatorInterface const &) override
|
||||
{
|
||||
UNREACHABLE();
|
||||
}
|
||||
/// @}
|
||||
};
|
||||
|
||||
class AggregateProcessor : public FeatureProcessorInterface
|
||||
{
|
||||
public:
|
||||
/// @name FeatureProcessorInterface overrides.
|
||||
/// @{
|
||||
std::shared_ptr<FeatureProcessorInterface> Clone() const override
|
||||
{
|
||||
UNREACHABLE();
|
||||
return nullptr;
|
||||
}
|
||||
void Process(feature::FeatureBuilder & fb) override
|
||||
{
|
||||
auto const id = fb.GetMostGenericOsmId();
|
||||
m_features.emplace(id, std::move(fb));
|
||||
}
|
||||
void Finish() override {}
|
||||
/// @}
|
||||
|
||||
std::map<base::GeoObjectId, feature::FeatureBuilder> m_features;
|
||||
};
|
||||
|
||||
template <class Dataset> class DatasetFilter : public FilterInterface
|
||||
{
|
||||
Dataset const & m_dataset;
|
||||
public:
|
||||
DatasetFilter(Dataset const & dataset) : m_dataset(dataset) {}
|
||||
|
||||
/// @name FilterInterface overrides.
|
||||
/// @{
|
||||
std::shared_ptr<FilterInterface> Clone() const override
|
||||
{
|
||||
UNREACHABLE();
|
||||
return nullptr;
|
||||
}
|
||||
bool IsAccepted(OsmElement const & e) const override
|
||||
{
|
||||
// All hotels under tourism tag.
|
||||
return !e.GetTag("tourism").empty();
|
||||
}
|
||||
bool IsAccepted(feature::FeatureBuilder const & fb) const override
|
||||
{
|
||||
return m_dataset.IsSponsoredCandidate(fb);
|
||||
}
|
||||
/// @}
|
||||
};
|
||||
|
||||
template <typename Dataset, typename Object = typename Dataset::Object>
|
||||
void RunImpl(GenerateInfo & info)
|
||||
{
|
||||
auto dataset = CreateDataset<Dataset>(info);
|
||||
LOG_SHORT(LINFO, (dataset->GetStorage().Size(), "objects are loaded"));
|
||||
|
||||
LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm));
|
||||
|
||||
generator::cache::IntermediateDataObjectsCache objectsCache;
|
||||
auto cache = std::make_shared<generator::cache::IntermediateData>(objectsCache, info);
|
||||
auto processor = make_shared<AggregateProcessor>();
|
||||
auto translator = std::make_shared<TranslatorMock>(processor, cache);
|
||||
translator->SetFilter(std::make_shared<DatasetFilter<Dataset>>(*dataset));
|
||||
|
||||
RawGenerator generator(info);
|
||||
generator.GenerateCustom(translator);
|
||||
CHECK(generator.Execute(), ());
|
||||
|
||||
if (FLAGS_generate)
|
||||
{
|
||||
ostream * ost = &cout;
|
||||
unique_ptr<ofstream> ofst;
|
||||
if (!FLAGS_sample.empty())
|
||||
{
|
||||
ofst = std::make_unique<ofstream>(FLAGS_sample);
|
||||
CHECK(ofst->is_open(), ("Can't open file", FLAGS_sample, strerror(errno)));
|
||||
ost = ofst.get();
|
||||
}
|
||||
GenerateSample(*dataset, processor->m_features, *ost);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto const sample = ReadSampleFromFile<Object>(FLAGS_sample);
|
||||
LOG_SHORT(LINFO, ("Sample size is", sample.size()));
|
||||
ofstream ost(FLAGS_factors);
|
||||
CHECK(ost.is_open(), ("Can't open file", FLAGS_factors, strerror(errno)));
|
||||
GenerateFactors(*dataset, processor->m_features, sample, ost);
|
||||
}
|
||||
}
|
||||
|
||||
void Run(DatasetType const datasetType, GenerateInfo & info)
|
||||
{
|
||||
switch (datasetType)
|
||||
{
|
||||
case DatasetType::Booking: RunImpl<KayakDataset>(info); break;
|
||||
//case DatasetType::Opentable: RunImpl<OpentableDataset>(info); break;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
gflags::SetUsageMessage("Calculates factors for given samples.");
|
||||
|
||||
if (argc == 1)
|
||||
{
|
||||
gflags::ShowUsageWithFlags(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
CHECK(!FLAGS_sample.empty(), ("Please specify sample path."));
|
||||
CHECK(!FLAGS_osm.empty(), ("Please specify osm path."));
|
||||
CHECK(!FLAGS_booking.empty() || !FLAGS_opentable.empty(),
|
||||
("Please specify either booking or opentable path."));
|
||||
CHECK(!FLAGS_factors.empty() || FLAGS_generate, ("Please either specify factors path"
|
||||
"or use -generate."));
|
||||
|
||||
auto const datasetType = FLAGS_booking.empty() ? DatasetType::Opentable : DatasetType::Booking;
|
||||
|
||||
classificator::Load();
|
||||
|
||||
auto info = GetGenerateInfo();
|
||||
GenerateIntermediateData(info);
|
||||
|
||||
Run(datasetType, info);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
#include "routing/routing_helpers.hpp"
|
||||
|
||||
#include "indexer/custom_keyvalue.hpp"
|
||||
#include "indexer/feature_algo.hpp"
|
||||
#include "indexer/feature_visibility.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
@@ -593,6 +594,32 @@ size_t FeatureBuilder::GetPointsCount() const
|
||||
return counter;
|
||||
}
|
||||
|
||||
void FeatureBuilder::SetHotelInfo(std::string uri, double rating)
|
||||
{
|
||||
// Normalize rating [0, 100]
|
||||
if (rating < 0 || rating > 10)
|
||||
rating = 0;
|
||||
else
|
||||
rating *= 10;
|
||||
|
||||
auto & meta = GetMetadata();
|
||||
auto const append = [src = Metadata::ESource::SRC_KAYAK, &meta](Metadata::EType type, auto val)
|
||||
{
|
||||
indexer::CustomKeyValue kv(meta.Get(type));
|
||||
kv.Add(src, val);
|
||||
meta.Set(type, kv.ToString());
|
||||
};
|
||||
|
||||
meta.Set(Metadata::FMD_EXTERNAL_URI, std::move(uri));
|
||||
|
||||
// append(Metadata::FMD_CUSTOM_IDS, id);
|
||||
|
||||
if (rating > 0)
|
||||
append(Metadata::FMD_RATINGS, static_cast<uint8_t>(std::round(rating)));
|
||||
// if (priceCategory > 0)
|
||||
// append(Metadata::FMD_PRICE_RATES, priceCategory);
|
||||
}
|
||||
|
||||
bool FeatureBuilder::IsDrawableInRange(int lowScale, int highScale) const
|
||||
{
|
||||
auto const types = GetTypesHolder();
|
||||
|
||||
@@ -159,6 +159,8 @@ public:
|
||||
Metadata const & GetMetadata() const { return m_params.GetMetadata(); }
|
||||
Metadata & GetMetadata() { return m_params.GetMetadata(); }
|
||||
|
||||
void SetHotelInfo(std::string uri, double rating);
|
||||
|
||||
// To work with types and names based on drawing.
|
||||
// Check classificator types for their compatibility with feature geometry type.
|
||||
// Need to call when using any classificator types manipulating.
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
#include "generator/addresses_collector.hpp"
|
||||
#include "generator/address_enricher.hpp"
|
||||
#include "generator/affiliation.hpp"
|
||||
//#include "generator/booking_dataset.hpp"
|
||||
#include "generator/kayak_dataset.hpp"
|
||||
#include "generator/coastlines_generator.hpp"
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/final_processor_utils.hpp"
|
||||
@@ -12,6 +14,8 @@
|
||||
#include "generator/osm2type.hpp"
|
||||
#include "generator/region_meta.hpp"
|
||||
|
||||
#include "generator/sponsored_dataset_inl.hpp"
|
||||
|
||||
#include "routing/speed_camera_prohibition.hpp"
|
||||
|
||||
#include "indexer/classificator.hpp"
|
||||
@@ -50,6 +54,9 @@ void CountryFinalProcessor::Process()
|
||||
if (!m_coastlineGeomFilename.empty())
|
||||
ProcessCoastline();
|
||||
|
||||
if (!m_hotelsFilename.empty())
|
||||
ProcessBooking();
|
||||
|
||||
// 1. Process roundabouts and addr:interpolation first.
|
||||
if (!m_miniRoundaboutsFilename.empty() || !m_addrInterpolFilename.empty())
|
||||
ProcessRoundabouts();
|
||||
@@ -89,6 +96,69 @@ void CountryFinalProcessor::Order()
|
||||
}
|
||||
*/
|
||||
|
||||
void CountryFinalProcessor::ProcessBooking()
|
||||
{
|
||||
KayakDataset dataset(m_hotelsFilename, m_hotelPlacesFileName);
|
||||
LOG(LINFO, ("Loaded", dataset.GetStorage().Size(), "hotels from", m_hotelsFilename));
|
||||
|
||||
std::ofstream matchingLogStream;
|
||||
matchingLogStream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||
matchingLogStream.open(m_hotelsStatusFilename);
|
||||
|
||||
std::mutex m;
|
||||
ForEachMwmTmp(m_temporaryMwmPath, [&](auto const & name, auto const & path)
|
||||
{
|
||||
if (!IsCountry(name))
|
||||
return;
|
||||
|
||||
std::stringstream sstream;
|
||||
sstream << std::fixed << std::setprecision(7);
|
||||
|
||||
size_t total = 0, matched = 0;
|
||||
|
||||
FeatureBuilderWriter<serialization_policy::MaxAccuracy> writer(path, true /* mangleName */);
|
||||
ForEachFeatureRawFormat<serialization_policy::MaxAccuracy>(path, [&](FeatureBuilder && fb, uint64_t)
|
||||
{
|
||||
bool hotelProcessed = false;
|
||||
if (dataset.IsSponsoredCandidate(fb))
|
||||
{
|
||||
++total;
|
||||
auto const id = dataset.FindMatchingObjectId(fb);
|
||||
if (id != KayakHotel::InvalidObjectId())
|
||||
{
|
||||
++matched;
|
||||
hotelProcessed = true;
|
||||
|
||||
dataset.PreprocessMatchedOsmObject(id, fb, [&](FeatureBuilder & newFeature)
|
||||
{
|
||||
if (newFeature.PreSerialize())
|
||||
writer.Write(newFeature);
|
||||
});
|
||||
|
||||
sstream << id;
|
||||
}
|
||||
else
|
||||
sstream << "NO";
|
||||
|
||||
auto const ll = mercator::ToLatLon(fb.GetKeyPoint());
|
||||
sstream << ",\t" << DebugPrint(fb.GetMostGenericOsmId()) << ",\t" << ll.m_lat << ',' << ll.m_lon << std::endl;
|
||||
}
|
||||
|
||||
if (!hotelProcessed)
|
||||
writer.Write(fb);
|
||||
});
|
||||
|
||||
std::lock_guard guard(m);
|
||||
matchingLogStream << sstream.str();
|
||||
LOG(LINFO, ("Hotels (MWM, total, matched):", name, total, matched));
|
||||
|
||||
}, m_threadsCount);
|
||||
|
||||
std::vector<FeatureBuilder> fbs;
|
||||
dataset.BuildOsmObjects([&](auto && fb) { fbs.emplace_back(std::move(fb)); });
|
||||
AppendToMwmTmp(fbs, *m_affiliations, m_temporaryMwmPath, m_threadsCount);
|
||||
}
|
||||
|
||||
void CountryFinalProcessor::ProcessRoundabouts()
|
||||
{
|
||||
auto const roundabouts = ReadMiniRoundabouts(m_miniRoundaboutsFilename);
|
||||
|
||||
@@ -41,6 +41,13 @@ public:
|
||||
m_addressPath = dir;
|
||||
}
|
||||
|
||||
void SetHotels(std::string const & hotelsFile, std::string const & hotelPlacesFile, std::string const & statusFile)
|
||||
{
|
||||
m_hotelsFilename = hotelsFile;
|
||||
m_hotelPlacesFileName = hotelPlacesFile;
|
||||
m_hotelsStatusFilename = statusFile;
|
||||
}
|
||||
|
||||
void SetCityBoundariesFiles(std::string const & collectorFile)
|
||||
{
|
||||
m_boundariesCollectorFile = collectorFile;
|
||||
@@ -54,6 +61,7 @@ public:
|
||||
private:
|
||||
//void Order();
|
||||
void ProcessCoastline();
|
||||
void ProcessBooking();
|
||||
void ProcessRoundabouts();
|
||||
void AddFakeNodes();
|
||||
void AddIsolines();
|
||||
@@ -63,12 +71,12 @@ private:
|
||||
|
||||
bool IsCountry(std::string const & filename);
|
||||
|
||||
std::string m_borderPath;
|
||||
std::string m_temporaryMwmPath;
|
||||
std::string m_intermediateDir;
|
||||
std::string m_isolinesPath, m_addressPath;
|
||||
std::string m_boundariesCollectorFile;
|
||||
std::string m_coastlineGeomFilename;
|
||||
std::string m_hotelsFilename, m_hotelPlacesFileName, m_hotelsStatusFilename;
|
||||
std::string m_worldCoastsFilename;
|
||||
std::string m_fakeNodesFilename;
|
||||
std::string m_miniRoundaboutsFilename;
|
||||
|
||||
@@ -43,6 +43,9 @@ std::vector<std::vector<std::string>> AppendToMwmTmp(std::vector<feature::Featur
|
||||
feature::AffiliationInterface const & affiliation,
|
||||
std::string const & temporaryMwmPath, size_t threadsCount = 1)
|
||||
{
|
||||
if (fbs.empty())
|
||||
return {};
|
||||
|
||||
auto affiliations = GetAffiliations(fbs, affiliation, threadsCount);
|
||||
std::unordered_map<std::string, std::vector<size_t>> countryToFbsIndexes;
|
||||
for (size_t i = 0; i < fbs.size(); ++i)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/cities_boundaries_builder.hpp"
|
||||
|
||||
#include "base/file_name_utils.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
@@ -49,6 +47,7 @@ struct GenerateInfo
|
||||
OsmSourceType m_osmFileType = OsmSourceType::XML;
|
||||
std::string m_osmFileName;
|
||||
|
||||
std::string m_hotelsPath;
|
||||
std::string m_brandsFilename;
|
||||
std::string m_brandsTranslationsFilename;
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ set(SRC
|
||||
source_data.hpp
|
||||
source_to_element_test.cpp
|
||||
speed_cameras_test.cpp
|
||||
sponsored_scoring_tests.cpp
|
||||
srtm_parser_test.cpp
|
||||
tag_admixer_test.cpp
|
||||
tesselator_test.cpp
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include "generator/geometry_holder.hpp"
|
||||
|
||||
#include "indexer/data_header.hpp"
|
||||
#include "indexer/custom_keyvalue.hpp"
|
||||
#include "indexer/feature_visibility.hpp"
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
@@ -397,4 +398,22 @@ UNIT_CLASS_TEST(TestWithClassificator, FBuilder_RemoveInconsistentTypes)
|
||||
TEST(!params.IsTypeExist(classif().GetTypeByPath({"hwtag", "nobicycle"})), ());
|
||||
}
|
||||
|
||||
UNIT_CLASS_TEST(TestWithClassificator, FBuilder_Hotel)
|
||||
{
|
||||
FeatureBuilder fb;
|
||||
|
||||
auto const & meta = fb.GetMetadata();
|
||||
auto const isEqual = [&meta, src = Metadata::SRC_KAYAK](Metadata::EType type, uint64_t val)
|
||||
{
|
||||
return indexer::CustomKeyValue(meta.Get(type)).Get(src) == val;
|
||||
};
|
||||
|
||||
fb.SetHotelInfo("Ritz,-c666-h777", 6.3);
|
||||
|
||||
//TEST(isEqual(Metadata::FMD_CUSTOM_IDS, 777), ());
|
||||
TEST(isEqual(Metadata::FMD_RATINGS, 63), ());
|
||||
//TEST(isEqual(Metadata::FMD_PRICE_RATES, 4), ());
|
||||
|
||||
TEST_EQUAL(meta.Get(Metadata::FMD_EXTERNAL_URI), "Ritz,-c666-h777", ());
|
||||
}
|
||||
} // namespace feature_builder_test
|
||||
|
||||
41
generator/generator_tests/sponsored_scoring_tests.cpp
Normal file
41
generator/generator_tests/sponsored_scoring_tests.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "testing/testing.hpp"
|
||||
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
#include "geometry/latlon.hpp"
|
||||
|
||||
namespace sponsored_scoring_tests
|
||||
{
|
||||
|
||||
generator::sponsored::MatchStats GetMatch(ms::LatLon osmLL, std::string const & osmName,
|
||||
ms::LatLon hotelLL, std::string const & hotelName)
|
||||
{
|
||||
// The same as SponsoredDataset::kDistanceLimitMeters
|
||||
return { ms::DistanceOnEarth(osmLL, hotelLL), 150.0, hotelName, osmName };
|
||||
}
|
||||
|
||||
UNIT_TEST(SponsoredScoring_Paris)
|
||||
{
|
||||
TEST(!GetMatch({48.8474633, 2.3712106}, "Hôtel de Marseille",
|
||||
{48.8473730, 2.3712020}, "Hotel Riesner").IsMatched(), ());
|
||||
|
||||
TEST(GetMatch({48.8760697, 2.3456749}, "Holiday Villa",
|
||||
{48.8761570, 2.3455750}, "Hotel Villa Lafayette Paris IX").IsMatched(), ());
|
||||
|
||||
TEST(GetMatch({48.8664199, 2.2892440}, "Hôtel Baltimore",
|
||||
{48.8663780, 2.2895710}, "Sofitel Paris Baltimore Tour Eiffel").IsMatched(), ());
|
||||
|
||||
TEST(!GetMatch({48.8808205, 2.3517253}, "Grand Hotel Magenta",
|
||||
{48.8806950, 2.3521320}, "Hotel Cambrai").IsMatched(), ());
|
||||
|
||||
// But may be false on the ground.
|
||||
TEST(GetMatch({48.8733283, 2.3004615}, "Hôtel Balzac",
|
||||
{48.8735222, 2.3004904}, "Apart Inn Paris - Balzac").IsMatched(), ());
|
||||
|
||||
TEST(!GetMatch({48.8470895, 2.3710844}, "Hôtel Mignon",
|
||||
{48.8473730, 2.3712020}, "Hotel Riesner").IsMatched(), ());
|
||||
|
||||
}
|
||||
|
||||
} // namespace sponsored_scoring_tests
|
||||
@@ -1,11 +1,12 @@
|
||||
#include "test_generator.hpp"
|
||||
|
||||
#include "generator/borders.hpp"
|
||||
#include "generator/camera_info_collector.hpp"
|
||||
#include "generator/feature_sorter.hpp"
|
||||
#include "generator/osm_source.hpp"
|
||||
#include "generator/raw_generator.hpp"
|
||||
|
||||
#include "generator/camera_info_collector.hpp"
|
||||
#include "generator/cities_boundaries_builder.hpp"
|
||||
#include "generator/maxspeeds_builder.hpp"
|
||||
#include "generator/restriction_generator.hpp"
|
||||
#include "generator/road_access_generator.hpp"
|
||||
|
||||
@@ -139,6 +139,7 @@ DEFINE_bool(
|
||||
DEFINE_bool(generate_maxspeed, false, "Generate section with maxspeed of road features.");
|
||||
|
||||
// Sponsored-related.
|
||||
DEFINE_string(hotels_path, "", "Path to the folder with hotels.csv and placefeed.csv files");
|
||||
DEFINE_string(complex_hierarchy_data, "", "Path to complex hierarchy in csv format.");
|
||||
|
||||
DEFINE_string(wikipedia_pages, "", "Input dir with wikipedia pages.");
|
||||
@@ -240,6 +241,7 @@ MAIN_WITH_ERROR_HANDLING([](int argc, char ** argv)
|
||||
genInfo.m_osmFileName = FLAGS_osm_file_name;
|
||||
genInfo.m_failOnCoasts = FLAGS_fail_on_coasts;
|
||||
genInfo.m_preloadCache = FLAGS_preload_cache;
|
||||
genInfo.m_hotelsPath = FLAGS_hotels_path;
|
||||
genInfo.m_popularPlacesFilename = FLAGS_popular_places_data;
|
||||
genInfo.m_brandsFilename = FLAGS_brands_data;
|
||||
genInfo.m_brandsTranslationsFilename = FLAGS_brands_translations_data;
|
||||
|
||||
124
generator/kayak_dataset.cpp
Normal file
124
generator/kayak_dataset.cpp
Normal file
@@ -0,0 +1,124 @@
|
||||
#include "generator/kayak_dataset.hpp"
|
||||
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/sponsored_dataset_inl.hpp"
|
||||
|
||||
#include "indexer/ftypes_matcher.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
|
||||
namespace generator
|
||||
{
|
||||
using namespace feature;
|
||||
|
||||
// KayakHotel ------------------------------------------------------------------------------------
|
||||
KayakHotel::KayakHotel(std::string src)
|
||||
{
|
||||
using namespace strings;
|
||||
|
||||
// Patch strange entries.
|
||||
if (src.starts_with("\","))
|
||||
src.erase(0, 1);
|
||||
|
||||
/// @todo For fast parsing we can preprocess src (quotes) and return string_view's.
|
||||
std::vector<std::string> rec;
|
||||
ParseCSVRow(src, ',', rec);
|
||||
|
||||
// Skip bad entries and header.
|
||||
if (rec.size() != Fields::Counter || rec[0] == "ChainID")
|
||||
return;
|
||||
|
||||
// Assign id in the end in case of possible errors.
|
||||
uint32_t id;
|
||||
CLOG(LDEBUG, to_uint(rec[Fields::KayakHotelID], id), ());
|
||||
CLOG(LDEBUG, to_uint(rec[Fields::PlaceID], m_placeID), ());
|
||||
CLOG(LDEBUG, to_double(rec[Fields::Latitude], m_latLon.m_lat), (rec[Fields::Latitude]));
|
||||
CLOG(LDEBUG, to_double(rec[Fields::Longitude], m_latLon.m_lon), (rec[Fields::Longitude]));
|
||||
|
||||
if (!to_double(rec[Fields::OverallRating], m_overallRating))
|
||||
m_overallRating = kInvalidRating;
|
||||
|
||||
m_name = rec[Fields::HotelName];
|
||||
m_address = rec[Fields::HotelAddress];
|
||||
|
||||
m_id.Set(id);
|
||||
}
|
||||
|
||||
// KayakPlace ----------------------------------------------------------------------------------
|
||||
KayakPlace::KayakPlace(std::string src)
|
||||
{
|
||||
using namespace strings;
|
||||
|
||||
std::vector<std::string> rec;
|
||||
ParseCSVRow(src, ',', rec);
|
||||
|
||||
if (rec.size() != Fields::Counter || rec[0] == "CountryCode")
|
||||
return;
|
||||
|
||||
m_good = to_uint(rec[Fields::PlaceID], m_placeID) &&
|
||||
to_uint(rec[Fields::KayakCityID], m_kayakCityID);
|
||||
}
|
||||
|
||||
std::string DebugPrint(KayakPlace const & p)
|
||||
{
|
||||
return std::to_string(p.m_placeID) + "; " + std::to_string(p.m_kayakCityID);
|
||||
}
|
||||
|
||||
// KayakDataset ----------------------------------------------------------------------------------
|
||||
KayakDataset::KayakDataset(std::string const & hotelsPath, std::string const & placesPath)
|
||||
: BaseDatasetT(hotelsPath)
|
||||
{
|
||||
std::ifstream source(placesPath);
|
||||
if (!source)
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", placesPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
for (std::string line; std::getline(source, line);)
|
||||
{
|
||||
KayakPlace place(std::move(line));
|
||||
line.clear();
|
||||
|
||||
if (place.m_good)
|
||||
CLOG(LDEBUG, m_place2kayak.emplace(place.m_placeID, place.m_kayakCityID).second, (place));
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BaseDatasetT::IsSponsoredCandidate(FeatureBuilder const & fb) const
|
||||
{
|
||||
if (fb.GetName(StringUtf8Multilang::kDefaultCode).empty())
|
||||
return false;
|
||||
|
||||
return ftypes::IsHotelChecker::Instance()(fb.GetTypes());
|
||||
}
|
||||
|
||||
template <>
|
||||
void BaseDatasetT::PreprocessMatchedOsmObject(ObjectId id, FeatureBuilder & fb, FBuilderFnT const fn) const
|
||||
{
|
||||
auto const & hotel = m_storage.GetObjectById(id);
|
||||
|
||||
// Only hack like this ..
|
||||
KayakDataset const & kds = static_cast<KayakDataset const &>(*this);
|
||||
uint32_t const cityID = kds.GetKayakCityID(hotel.m_placeID);
|
||||
if (cityID)
|
||||
{
|
||||
std::string uri = hotel.m_name + ",-c" + std::to_string(cityID) + "-h" + std::to_string(hotel.m_id.Get());
|
||||
fb.SetHotelInfo(std::move(uri), hotel.m_overallRating);
|
||||
}
|
||||
else
|
||||
LOG(LWARNING, ("Unknown PlaceID", hotel.m_placeID));
|
||||
|
||||
fn(fb);
|
||||
}
|
||||
|
||||
template <>
|
||||
void BaseDatasetT::BuildObject(Object const &, FBuilderFnT const &) const
|
||||
{
|
||||
// Don't create new objects.
|
||||
}
|
||||
|
||||
} // namespace generator
|
||||
115
generator/kayak_dataset.hpp
Normal file
115
generator/kayak_dataset.hpp
Normal file
@@ -0,0 +1,115 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
#include "generator/sponsored_object_base.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
class KayakHotel : public SponsoredObjectBase
|
||||
{
|
||||
enum Fields
|
||||
{
|
||||
ChainID = 0,
|
||||
ChainName,
|
||||
Checkin,
|
||||
Checkout,
|
||||
CountryCode,
|
||||
CountryFileName,
|
||||
CountryName,
|
||||
CurrencyCode,
|
||||
DateCreated,
|
||||
Facilities,
|
||||
HotelAddress,
|
||||
HotelFileName,
|
||||
HotelID,
|
||||
HotelName,
|
||||
HotelPostcode,
|
||||
IataPlaceCode,
|
||||
ImageID,
|
||||
KayakHotelID,
|
||||
LastUpdated,
|
||||
Latitude,
|
||||
Longitude,
|
||||
MinRate,
|
||||
OverallRating,
|
||||
PlaceFileName,
|
||||
PlaceID,
|
||||
PlaceName,
|
||||
PlaceType,
|
||||
Popularity,
|
||||
PropertyType,
|
||||
PropertyTypeID,
|
||||
SelfRated,
|
||||
StarRating,
|
||||
StateName,
|
||||
StatePlaceID,
|
||||
StatePlacefilename,
|
||||
Themes,
|
||||
Trademarked,
|
||||
TransliteratedHotelName,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
public:
|
||||
explicit KayakHotel(std::string src);
|
||||
|
||||
static constexpr size_t FieldsCount() { return Fields::Counter; }
|
||||
|
||||
static double constexpr kInvalidRating = 0;
|
||||
double m_overallRating = kInvalidRating;
|
||||
uint32_t m_placeID = 0;
|
||||
};
|
||||
|
||||
class KayakPlace
|
||||
{
|
||||
enum Fields
|
||||
{
|
||||
CountryCode = 0,
|
||||
CountryFileName,
|
||||
CountryName,
|
||||
HasHotels,
|
||||
HasImage,
|
||||
Hierarchy,
|
||||
IataCode,
|
||||
KayakCityID,
|
||||
KayakPlaceID,
|
||||
Latitude,
|
||||
Longitude,
|
||||
NumberOfHotels,
|
||||
PlaceFileName,
|
||||
PlaceID,
|
||||
PlaceName,
|
||||
PlaceType,
|
||||
Searchable,
|
||||
|
||||
Counter
|
||||
};
|
||||
|
||||
public:
|
||||
explicit KayakPlace(std::string src);
|
||||
|
||||
friend std::string DebugPrint(KayakPlace const & p);
|
||||
|
||||
uint32_t m_placeID, m_kayakCityID;
|
||||
bool m_good = false;
|
||||
};
|
||||
|
||||
using BaseDatasetT = SponsoredDataset<KayakHotel>;
|
||||
class KayakDataset : public BaseDatasetT
|
||||
{
|
||||
std::unordered_map<uint32_t, uint32_t> m_place2kayak;
|
||||
|
||||
public:
|
||||
KayakDataset(std::string const & hotelsPath, std::string const & placesPath);
|
||||
|
||||
uint32_t GetKayakCityID(uint32_t placeID) const
|
||||
{
|
||||
auto it = m_place2kayak.find(placeID);
|
||||
return it != m_place2kayak.end() ? it->second : 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace generator
|
||||
@@ -1,46 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/feature_generator.hpp"
|
||||
#include "generator/processor_interface.hpp"
|
||||
|
||||
#include "indexer/feature_data.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/geo_object_id.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
template <typename Dataset>
|
||||
class ProcessorBooking : public FeatureProcessorInterface
|
||||
{
|
||||
public:
|
||||
ProcessorBooking(Dataset const & dataset,
|
||||
std::map<base::GeoObjectId, feature::FeatureBuilder> & features)
|
||||
: m_dataset(dataset), m_features(features)
|
||||
{
|
||||
}
|
||||
|
||||
// FeatureProcessorInterface overrides:
|
||||
std::shared_ptr<FeatureProcessorInterface> Clone() const override
|
||||
{
|
||||
CHECK(false, ());
|
||||
return {};
|
||||
}
|
||||
|
||||
void Process(feature::FeatureBuilder & fb) override
|
||||
{
|
||||
if (m_dataset.NecessaryMatchingConditionHolds(fb))
|
||||
m_features.emplace(fb.GetMostGenericOsmId(), fb);
|
||||
}
|
||||
|
||||
void Finish() override {}
|
||||
|
||||
private:
|
||||
Dataset const & m_dataset;
|
||||
std::map<base::GeoObjectId, feature::FeatureBuilder> & m_features;
|
||||
};
|
||||
} // namespace generator
|
||||
@@ -1,7 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/factory_utils.hpp"
|
||||
#include "generator/processor_booking.hpp"
|
||||
#include "generator/processor_coastline.hpp"
|
||||
//#include "generator/processor_complex.hpp"
|
||||
#include "generator/processor_country.hpp"
|
||||
|
||||
@@ -141,6 +141,7 @@ void RawGenerator::GenerateCoasts()
|
||||
|
||||
void RawGenerator::GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator)
|
||||
{
|
||||
CHECK(translator, ());
|
||||
m_translators->Append(translator);
|
||||
}
|
||||
|
||||
@@ -148,6 +149,7 @@ void RawGenerator::GenerateCustom(
|
||||
std::shared_ptr<TranslatorInterface> const & translator,
|
||||
std::shared_ptr<FinalProcessorIntermediateMwmInterface> const & finalProcessor)
|
||||
{
|
||||
CHECK(translator && finalProcessor, ());
|
||||
m_translators->Append(translator);
|
||||
m_finalProcessors.emplace(finalProcessor);
|
||||
}
|
||||
@@ -189,6 +191,14 @@ RawGenerator::FinalProcessorPtr RawGenerator::CreateCountryFinalProcessor(
|
||||
auto finalProcessor = std::make_shared<CountryFinalProcessor>(affiliations, m_genInfo.m_tmpDir, m_threadsCount);
|
||||
finalProcessor->SetIsolinesDir(m_genInfo.m_isolinesDir);
|
||||
finalProcessor->SetAddressesDir(m_genInfo.m_addressesDir);
|
||||
|
||||
if (!m_genInfo.m_hotelsPath.empty())
|
||||
{
|
||||
finalProcessor->SetHotels(base::JoinPath(m_genInfo.m_hotelsPath, "hotels.csv"),
|
||||
base::JoinPath(m_genInfo.m_hotelsPath, "placefeed.csv"),
|
||||
m_genInfo.GetIntermediateFileName("hotels_status.csv"));
|
||||
}
|
||||
|
||||
finalProcessor->SetMiniRoundabouts(m_genInfo.GetIntermediateFileName(MINI_ROUNDABOUTS_FILENAME));
|
||||
finalProcessor->SetAddrInterpolation(m_genInfo.GetIntermediateFileName(ADDR_INTERPOL_FILENAME));
|
||||
if (addAds)
|
||||
|
||||
51
generator/sponsored_dataset.hpp
Normal file
51
generator/sponsored_dataset.hpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/sponsored_object_storage.hpp"
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
namespace feature { class FeatureBuilder; }
|
||||
|
||||
namespace generator
|
||||
{
|
||||
|
||||
template<typename SponsoredObject>
|
||||
class SponsoredDataset
|
||||
{
|
||||
public:
|
||||
using Object = SponsoredObject;
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
|
||||
static double constexpr kDistanceLimitMeters = 150;
|
||||
static size_t constexpr kMaxSelectedElements = 3;
|
||||
|
||||
explicit SponsoredDataset(std::string const & dataPath);
|
||||
|
||||
/// @return true if |fb| satisfies some necessary conditions to match one or serveral objects from dataset.
|
||||
bool IsSponsoredCandidate(feature::FeatureBuilder const & fb) const;
|
||||
ObjectId FindMatchingObjectId(feature::FeatureBuilder const & fb) const;
|
||||
|
||||
using FBuilderFnT = std::function<void(feature::FeatureBuilder &)>;
|
||||
// Applies changes to a given osm object (for example, remove hotel type)
|
||||
// and passes the result to |fn|.
|
||||
void PreprocessMatchedOsmObject(ObjectId matchedObjId, feature::FeatureBuilder & fb, FBuilderFnT const fn) const;
|
||||
// Creates objects and adds them to the map (MWM) via |fn|.
|
||||
void BuildOsmObjects(FBuilderFnT const & fn) const;
|
||||
|
||||
static sponsored::MatchStats CalcScore(Object const & obj, feature::FeatureBuilder const & fb);
|
||||
sponsored::MatchStats CalcScore(ObjectId objId, feature::FeatureBuilder const & fb) const
|
||||
{
|
||||
return CalcScore(m_storage.GetObjectById(objId), fb);
|
||||
}
|
||||
|
||||
SponsoredObjectStorage<Object> const & GetStorage() const { return m_storage; }
|
||||
|
||||
private:
|
||||
void BuildObject(Object const & object, FBuilderFnT const & fn) const;
|
||||
|
||||
SponsoredObjectStorage<Object> m_storage;
|
||||
};
|
||||
|
||||
} // namespace generator
|
||||
72
generator/sponsored_dataset_inl.hpp
Normal file
72
generator/sponsored_dataset_inl.hpp
Normal file
@@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include "generator/feature_builder.hpp"
|
||||
#include "generator/sponsored_dataset.hpp"
|
||||
|
||||
#include "geometry/mercator.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace generator
|
||||
{
|
||||
|
||||
// SponsoredDataset --------------------------------------------------------------------------------
|
||||
template <typename SponsoredObject>
|
||||
SponsoredDataset<SponsoredObject>::SponsoredDataset(std::string const & dataPath)
|
||||
: m_storage(kDistanceLimitMeters, kMaxSelectedElements)
|
||||
{
|
||||
m_storage.LoadData(dataPath);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
void SponsoredDataset<SponsoredObject>::BuildOsmObjects(FBuilderFnT const & fn) const
|
||||
{
|
||||
for (auto const & item : m_storage.GetObjects())
|
||||
BuildObject(item.second, fn);
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
sponsored::MatchStats SponsoredDataset<SponsoredObject>::CalcScore(
|
||||
Object const & obj, feature::FeatureBuilder const & fb)
|
||||
{
|
||||
auto const fbCenter = mercator::ToLatLon(fb.GetKeyPoint());
|
||||
auto const distance = ms::DistanceOnEarth(fbCenter, obj.m_latLon);
|
||||
|
||||
/// @todo Input dataset is in English language.
|
||||
auto name = fb.GetName(StringUtf8Multilang::kEnglishCode);
|
||||
if (name.empty())
|
||||
name = fb.GetName(StringUtf8Multilang::kDefaultCode);
|
||||
|
||||
return { distance, kDistanceLimitMeters, obj.m_name, std::string(name) };
|
||||
}
|
||||
|
||||
template <typename SponsoredObject>
|
||||
typename SponsoredDataset<SponsoredObject>::ObjectId
|
||||
SponsoredDataset<SponsoredObject>::FindMatchingObjectId(feature::FeatureBuilder const & fb) const
|
||||
{
|
||||
// Find |kMaxSelectedElements| nearest values to a point, sorted by distance?
|
||||
auto const indices = m_storage.GetNearestObjects(mercator::ToLatLon(fb.GetKeyPoint()));
|
||||
|
||||
// Select best candidate by score.
|
||||
double bestScore = -1;
|
||||
auto res = Object::InvalidObjectId();
|
||||
for (auto const i : indices)
|
||||
{
|
||||
auto const r = CalcScore(i, fb);
|
||||
if (r.IsMatched())
|
||||
{
|
||||
double const score = r.GetMatchingScore();
|
||||
if (score > bestScore)
|
||||
{
|
||||
bestScore = score;
|
||||
res = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace generator
|
||||
45
generator/sponsored_object_base.hpp
Normal file
45
generator/sponsored_object_base.hpp
Normal file
@@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include "geometry/latlon.hpp"
|
||||
|
||||
#include "base/newtype.hpp"
|
||||
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
struct SponsoredObjectBase
|
||||
{
|
||||
NEWTYPE(uint32_t, ObjectId);
|
||||
|
||||
static constexpr ObjectId InvalidObjectId()
|
||||
{
|
||||
return ObjectId(std::numeric_limits<typename ObjectId::RepType>::max());
|
||||
}
|
||||
|
||||
virtual ~SponsoredObjectBase() = default;
|
||||
|
||||
bool HasAddresParts() const { return !m_street.empty() || !m_houseNumber.empty(); }
|
||||
|
||||
ObjectId m_id{InvalidObjectId()};
|
||||
ms::LatLon m_latLon = ms::LatLon::Zero();
|
||||
std::string m_name;
|
||||
|
||||
std::string m_street;
|
||||
std::string m_houseNumber;
|
||||
std::string m_address;
|
||||
};
|
||||
|
||||
NEWTYPE_SIMPLE_OUTPUT(SponsoredObjectBase::ObjectId);
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & s, SponsoredObjectBase const & h)
|
||||
{
|
||||
s << std::fixed << std::setprecision(7);
|
||||
s << "Id: " << h.m_id << "\t Name: " << h.m_name << "\t Address: " << h.m_address
|
||||
<< "\t lat: " << h.m_latLon.m_lat << " lon: " << h.m_latLon.m_lon;
|
||||
return s;
|
||||
}
|
||||
} // namespace generator
|
||||
178
generator/sponsored_object_storage.hpp
Normal file
178
generator/sponsored_object_storage.hpp
Normal file
@@ -0,0 +1,178 @@
|
||||
#pragma once
|
||||
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "geometry/distance_on_sphere.hpp"
|
||||
#include "geometry/latlon.hpp"
|
||||
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "std/boost_geometry.hpp"
|
||||
#include <boost/geometry/index/rtree.hpp>
|
||||
|
||||
|
||||
namespace generator
|
||||
{
|
||||
template <typename Object>
|
||||
class SponsoredObjectStorage
|
||||
{
|
||||
public:
|
||||
using ObjectId = typename Object::ObjectId;
|
||||
using ObjectsContainer = std::map<ObjectId, Object>;
|
||||
using ExcludedIdsContainer = std::unordered_set<ObjectId, typename ObjectId::Hash>;
|
||||
|
||||
SponsoredObjectStorage(double distanceLimitMeters, size_t maxSelectedElements)
|
||||
: m_distanceLimitMeters(distanceLimitMeters)
|
||||
, m_maxSelectedElements(maxSelectedElements)
|
||||
{
|
||||
}
|
||||
|
||||
double GetDistanceLimitInMeters() const
|
||||
{
|
||||
return m_distanceLimitMeters;
|
||||
}
|
||||
|
||||
size_t GetMaxSelectedElements() const
|
||||
{
|
||||
return m_maxSelectedElements;
|
||||
}
|
||||
|
||||
ObjectsContainer const & GetObjects() const
|
||||
{
|
||||
return m_objects;
|
||||
}
|
||||
|
||||
size_t Size() const
|
||||
{
|
||||
return m_objects.size();
|
||||
}
|
||||
|
||||
void LoadData(std::string const & dataPath)
|
||||
{
|
||||
if (dataPath.empty())
|
||||
return;
|
||||
|
||||
std::ifstream dataSource(dataPath);
|
||||
if (!dataSource)
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", dataPath, ":", strerror(errno)));
|
||||
return;
|
||||
}
|
||||
|
||||
LoadData(dataSource, LoadExcludedIds({})); // empty exclude path
|
||||
}
|
||||
|
||||
ExcludedIdsContainer LoadExcludedIds(std::string const & excludedIdsPath)
|
||||
{
|
||||
if (excludedIdsPath.empty())
|
||||
return {};
|
||||
|
||||
std::ifstream source(excludedIdsPath);
|
||||
if (!source)
|
||||
{
|
||||
LOG(LERROR, ("Error while opening", excludedIdsPath, ":", strerror(errno)));
|
||||
return {};
|
||||
}
|
||||
|
||||
ExcludedIdsContainer result;
|
||||
for (std::string line; std::getline(source, line);)
|
||||
{
|
||||
ObjectId id{Object::InvalidObjectId()};
|
||||
|
||||
if (!strings::to_any(line, id.Get()))
|
||||
{
|
||||
LOG(LWARNING, ("Incorrect excluded sponsored id:", line));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (id != Object::InvalidObjectId())
|
||||
result.emplace(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void LoadData(std::istream & src, ExcludedIdsContainer const & excludedIds)
|
||||
{
|
||||
m_objects.clear();
|
||||
m_rtree.clear();
|
||||
|
||||
for (std::string line; std::getline(src, line);)
|
||||
{
|
||||
Object object(std::move(line));
|
||||
line.clear();
|
||||
|
||||
if (object.m_id != Object::InvalidObjectId() &&
|
||||
excludedIds.find(object.m_id) == excludedIds.cend())
|
||||
{
|
||||
m_objects.emplace(object.m_id, object);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const & item : m_objects)
|
||||
{
|
||||
auto const & object = item.second;
|
||||
Box b(Point(object.m_latLon.m_lat, object.m_latLon.m_lon),
|
||||
Point(object.m_latLon.m_lat, object.m_latLon.m_lon));
|
||||
m_rtree.insert(make_pair(b, object.m_id));
|
||||
}
|
||||
}
|
||||
|
||||
Object const & GetObjectById(ObjectId id) const
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Object & GetObjectById(ObjectId id)
|
||||
{
|
||||
auto const it = m_objects.find(id);
|
||||
CHECK(it != end(m_objects), ("Got wrong object id:", id));
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::vector<ObjectId> GetNearestObjects(ms::LatLon const & latLon) const
|
||||
{
|
||||
namespace bgi = boost::geometry::index;
|
||||
|
||||
std::vector<ObjectId> indexes;
|
||||
for_each(bgi::qbegin(m_rtree, bgi::nearest(Point(latLon.m_lat, latLon.m_lon),
|
||||
static_cast<unsigned>(m_maxSelectedElements))),
|
||||
bgi::qend(m_rtree), [this, &latLon, &indexes](Value const & v)
|
||||
{
|
||||
auto const & object = GetObjectById(v.second);
|
||||
double const dist = ms::DistanceOnEarth(latLon, object.m_latLon);
|
||||
if (m_distanceLimitMeters != 0.0 && dist > m_distanceLimitMeters)
|
||||
return;
|
||||
|
||||
indexes.emplace_back(v.second);
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
private:
|
||||
// TODO(mgsergio): Get rid of Box since boost::rtree supports point as value type.
|
||||
// TODO(mgsergio): Use mercator instead of latlon or boost::geometry::cs::spherical_equatorial
|
||||
// instead of boost::geometry::cs::cartesian.
|
||||
using Point = boost::geometry::model::point<float, 2, boost::geometry::cs::cartesian>;
|
||||
using Box = boost::geometry::model::box<Point>;
|
||||
using Value = std::pair<Box, ObjectId>;
|
||||
|
||||
// Create the rtree using default constructor.
|
||||
boost::geometry::index::rtree<Value, boost::geometry::index::quadratic<16>> m_rtree;
|
||||
ObjectsContainer m_objects;
|
||||
|
||||
double const m_distanceLimitMeters;
|
||||
size_t const m_maxSelectedElements;
|
||||
};
|
||||
} // namespace generator
|
||||
146
generator/sponsored_scoring.cpp
Normal file
146
generator/sponsored_scoring.cpp
Normal file
@@ -0,0 +1,146 @@
|
||||
#include "generator/sponsored_scoring.hpp"
|
||||
|
||||
#include "search/ranking_utils.hpp"
|
||||
|
||||
#include "indexer/search_string_utils.hpp"
|
||||
|
||||
#include "base/math.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace sponsored
|
||||
{
|
||||
namespace
|
||||
{
|
||||
using StringT = strings::UniString;
|
||||
class SkipTokens
|
||||
{
|
||||
std::set<StringT> m_skip;
|
||||
public:
|
||||
SkipTokens()
|
||||
{
|
||||
/// @todo Add other common terms?
|
||||
m_skip.insert(strings::MakeUniString("hotel"));
|
||||
}
|
||||
bool Has(StringT const & s) const
|
||||
{
|
||||
return m_skip.count(s) > 0;
|
||||
}
|
||||
};
|
||||
|
||||
using WeightedBagOfWords = std::vector<std::pair<StringT, double>>;
|
||||
|
||||
std::vector<StringT> StringToWords(std::string const & str)
|
||||
{
|
||||
auto result = search::NormalizeAndTokenizeString(str);
|
||||
|
||||
static SkipTokens toSkip;
|
||||
auto it = std::remove_if(result.begin(), result.end(), [](StringT const & s)
|
||||
{
|
||||
return toSkip.Has(s) || search::IsStopWord(s);
|
||||
});
|
||||
|
||||
// In case if name is like "The Hotel".
|
||||
if (std::distance(result.begin(), it) > 0)
|
||||
result.erase(it, result.end());
|
||||
|
||||
std::sort(result.begin(), result.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
WeightedBagOfWords MakeWeightedBagOfWords(std::vector<StringT> const & words)
|
||||
{
|
||||
// TODO(mgsergio): Calculate tf-idsf score for every word.
|
||||
auto constexpr kTfIdfScorePlaceholder = 1;
|
||||
|
||||
WeightedBagOfWords result;
|
||||
for (size_t i = 0; i < words.size(); ++i)
|
||||
{
|
||||
result.emplace_back(words[i], kTfIdfScorePlaceholder);
|
||||
while (i + 1 < words.size() && words[i] == words[i + 1])
|
||||
{
|
||||
result.back().second += kTfIdfScorePlaceholder; // TODO(mgsergio): tf-idf score for result[i].frist;
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagsDotProduct(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
double result = 0;
|
||||
|
||||
auto lhsIt = begin(lhs);
|
||||
auto rhsIt = begin(rhs);
|
||||
|
||||
while (lhsIt != end(lhs) && rhsIt != end(rhs))
|
||||
{
|
||||
if (lhsIt->first == rhsIt->first)
|
||||
{
|
||||
result += lhsIt->second * rhsIt->second;
|
||||
++lhsIt;
|
||||
++rhsIt;
|
||||
}
|
||||
else if (lhsIt->first < rhsIt->first)
|
||||
{
|
||||
++lhsIt;
|
||||
}
|
||||
else
|
||||
{
|
||||
++rhsIt;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double WeightedBagOfWordsCos(WeightedBagOfWords const & lhs, WeightedBagOfWords const & rhs)
|
||||
{
|
||||
auto const product = WeightedBagsDotProduct(lhs, rhs);
|
||||
auto const lhsLength = sqrt(WeightedBagsDotProduct(lhs, lhs));
|
||||
auto const rhsLength = sqrt(WeightedBagsDotProduct(rhs, rhs));
|
||||
|
||||
// WeightedBagsDotProduct returns 0.0 if lhs.empty() || rhs.empty() or
|
||||
// if every element of either lhs or rhs is 0.0.
|
||||
if (product == 0.0)
|
||||
return 0.0;
|
||||
|
||||
return product / (lhsLength * rhsLength);
|
||||
}
|
||||
|
||||
double GetLinearNormDistanceScore(double distance, double const maxDistance)
|
||||
{
|
||||
CHECK_NOT_EQUAL(maxDistance, 0.0, ("maxDistance cannot be 0."));
|
||||
distance = base::Clamp(distance, 0.0, maxDistance);
|
||||
return 1.0 - distance / maxDistance;
|
||||
}
|
||||
|
||||
double GetNameSimilarityScore(std::string const & booking_name, std::string const & osm_name)
|
||||
{
|
||||
auto const aws = MakeWeightedBagOfWords(StringToWords(booking_name));
|
||||
auto const bws = MakeWeightedBagOfWords(StringToWords(osm_name));
|
||||
|
||||
if (aws.empty() && bws.empty())
|
||||
return 1.0;
|
||||
if (aws.empty() || bws.empty())
|
||||
return 0.0;
|
||||
|
||||
return WeightedBagOfWordsCos(aws, bws);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
MatchStats::MatchStats(double distM, double distLimitM, std::string const & name, std::string const & fbName)
|
||||
: m_distance(distM)
|
||||
{
|
||||
m_linearNormDistanceScore = GetLinearNormDistanceScore(distM, distLimitM);
|
||||
|
||||
// TODO(mgsergio): Check all translations and use the best one.
|
||||
m_nameSimilarityScore = GetNameSimilarityScore(name, fbName);
|
||||
}
|
||||
|
||||
} // namespace sponsored
|
||||
} // namespace generator
|
||||
41
generator/sponsored_scoring.hpp
Normal file
41
generator/sponsored_scoring.hpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
struct SponsoredObjectBase;
|
||||
|
||||
namespace sponsored
|
||||
{
|
||||
|
||||
/// Represents a match scoring statistics of a sponsored object against OSM object.
|
||||
class MatchStats
|
||||
{
|
||||
// Calculated with tools/python/booking_hotels_quality.py.
|
||||
static double constexpr kOptimalThreshold = 0.304875;
|
||||
|
||||
public:
|
||||
MatchStats(double distM, double distLimitM, std::string const & name, std::string const & fbName);
|
||||
|
||||
/// @return some score based on geven fields and classificator tuning.
|
||||
double GetMatchingScore() const
|
||||
{
|
||||
// TODO(mgsergio): Use tuner to get optimal function.
|
||||
return m_linearNormDistanceScore * m_nameSimilarityScore;
|
||||
}
|
||||
|
||||
/// @return true if GetMatchingScore is greater then some theshold.
|
||||
bool IsMatched() const
|
||||
{
|
||||
return GetMatchingScore() > kOptimalThreshold;
|
||||
}
|
||||
|
||||
public:
|
||||
double m_distance;
|
||||
double m_linearNormDistanceScore;
|
||||
double m_nameSimilarityScore;
|
||||
};
|
||||
|
||||
} // namespace sponsored
|
||||
} // namespace generator
|
||||
@@ -98,6 +98,27 @@ std::unique_ptr<FeatureType> FeatureGetter::GetFeatureByIndex(uint32_t index) co
|
||||
return m_guard->GetFeatureByIndex(index);
|
||||
}
|
||||
|
||||
void LoadDataSource(DataSource & dataSource)
|
||||
{
|
||||
std::vector<platform::LocalCountryFile> localFiles;
|
||||
|
||||
Platform & platform = GetPlatform();
|
||||
platform::FindAllLocalMapsInDirectoryAndCleanup(platform.WritableDir(), 0 /* version */,
|
||||
-1 /* latestVersion */, localFiles);
|
||||
for (auto const & localFile : localFiles)
|
||||
{
|
||||
LOG(LINFO, ("Found mwm:", localFile));
|
||||
try
|
||||
{
|
||||
dataSource.RegisterMap(localFile);
|
||||
}
|
||||
catch (RootException const & ex)
|
||||
{
|
||||
CHECK(false, (ex.Msg(), "Bad mwm file:", localFile));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ParseFeatureIdToOsmIdMapping(std::string const & path,
|
||||
std::unordered_map<uint32_t, base::GeoObjectId> & mapping)
|
||||
{
|
||||
|
||||
@@ -55,6 +55,8 @@ private:
|
||||
MwmSet::MwmId m_mwmId;
|
||||
};
|
||||
|
||||
void LoadDataSource(DataSource & dataSource);
|
||||
|
||||
class FeatureGetter
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -281,10 +281,6 @@ class PathProvider:
|
||||
def ugc_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "ugc_db.sqlite3")
|
||||
|
||||
@property
|
||||
def hotels_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "hotels.csv")
|
||||
|
||||
@property
|
||||
def promo_catalog_cities_path(self) -> AnyStr:
|
||||
return os.path.join(self.intermediate_data_path, "promo_catalog_cities.json")
|
||||
|
||||
@@ -45,7 +45,7 @@ class GenTool:
|
||||
"stats_types": bool,
|
||||
"version": bool,
|
||||
"threads_count": int,
|
||||
"booking_data": str,
|
||||
"hotels_path": str,
|
||||
"promo_catalog_cities": str,
|
||||
"brands_data": str,
|
||||
"brands_translations_data": str,
|
||||
|
||||
@@ -104,24 +104,25 @@ class StagePreprocess(Stage):
|
||||
|
||||
@outer_stage
|
||||
@depends_from_internal(
|
||||
D(settings.HOTELS_URL, PathProvider.hotels_path, "p"),
|
||||
D(settings.PROMO_CATALOG_CITIES_URL, PathProvider.promo_catalog_cities_path, "p"),
|
||||
D(settings.POPULARITY_URL, PathProvider.popularity_path, "p"),
|
||||
D(settings.FOOD_URL, PathProvider.food_paths, "p"),
|
||||
D(settings.FOOD_TRANSLATIONS_URL, PathProvider.food_translations_path, "p"),
|
||||
)
|
||||
@test_stage(
|
||||
Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
|
||||
)
|
||||
# @test_stage(
|
||||
# Test(st.make_test_booking_data(max_days=7), lambda e, _: e.production, True)
|
||||
# )
|
||||
class StageFeatures(Stage):
|
||||
def apply(self, env: Env):
|
||||
extra = {}
|
||||
if is_accepted(env, StageDescriptions):
|
||||
extra.update({"idToWikidata": env.paths.id_to_wikidata_path})
|
||||
|
||||
extra.update({"hotels_path": settings.HOTELS_URL})
|
||||
|
||||
if env.production:
|
||||
extra.update(
|
||||
{
|
||||
"booking_data": env.paths.hotels_path,
|
||||
"promo_catalog_cities": env.paths.promo_catalog_cities_path,
|
||||
"popular_places_data": env.paths.popularity_path,
|
||||
"brands_data": env.paths.food_paths,
|
||||
|
||||
@@ -69,6 +69,7 @@ NEED_BUILD_WORLD_ROADS: false
|
||||
|
||||
# Urls for production maps generation.
|
||||
# UGC_URL:
|
||||
# Local path (not url!) where hotels.csv and placefeed.csv files are located.
|
||||
# HOTELS_URL:
|
||||
# PROMO_CATALOG_CITIES:
|
||||
# POPULARITY_URL:
|
||||
|
||||
Reference in New Issue
Block a user