Fixed getting url bug.

Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
Viktor Govako
2022-09-07 00:44:48 +03:00
committed by Konstantin Pastbin
parent 2f2c3b042f
commit c2c68c4a76
3 changed files with 31 additions and 34 deletions

View File

@@ -2,7 +2,6 @@
#include "generator/utils.hpp" #include "generator/utils.hpp"
#include "indexer/feature.hpp" #include "indexer/feature.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/feature_processor.hpp" #include "indexer/feature_processor.hpp"
#include "platform/platform.hpp" #include "platform/platform.hpp"
@@ -97,36 +96,29 @@ void DescriptionsCollector::operator() (FeatureType & ft, uint32_t featureId)
void DescriptionsCollector::operator() (std::string const & wikiUrl, uint32_t featureId) void DescriptionsCollector::operator() (std::string const & wikiUrl, uint32_t featureId)
{ {
std::string path; descriptions::LangMeta langsMeta;
size_t size = 0;
// First try to get wikipedia url. // First try to get wikipedia url.
bool const isWikiUrl = !wikiUrl.empty(); if (!wikiUrl.empty())
if (isWikiUrl) size = FindPageAndFill(MakePathForWikipedia(m_wikipediaDir, wikiUrl), langsMeta);
// Second try to get wikidata id.
bool const isWikiUrl = !langsMeta.empty();
if (!isWikiUrl)
{ {
path = MakePathForWikipedia(m_wikipediaDir, wikiUrl);
}
else
{
// Second try to get wikidata id.
auto const wikidataId = m_wikidataHelper.GetWikidataId(featureId); auto const wikidataId = m_wikidataHelper.GetWikidataId(featureId);
if (wikidataId) if (wikidataId)
path = MakePathForWikidata(m_wikipediaDir, *wikidataId); size = FindPageAndFill(MakePathForWikidata(m_wikipediaDir, *wikidataId), langsMeta);
} }
if (path.empty()) if (langsMeta.empty())
return; return;
descriptions::LangMeta langsMeta; if (size > 0)
int const sz = FindPageAndFill(path, langsMeta);
if (sz < 0)
{
LOG(LWARNING, ("Page", path, "not found."));
return;
}
else if (sz > 0)
{ {
// Add only new loaded pages (not from cache). // Add only new loaded pages (not from cache).
m_stat.AddSize(sz); m_stat.AddSize(size);
m_stat.IncPage(); m_stat.IncPage();
} }
@@ -166,10 +158,10 @@ std::string DescriptionsCollector::FillStringFromFile(std::string const & fullPa
return std::string(std::istreambuf_iterator<char>(stream), std::istreambuf_iterator<char>()); return std::string(std::istreambuf_iterator<char>(stream), std::istreambuf_iterator<char>());
} }
int DescriptionsCollector::FindPageAndFill(std::string const & path, descriptions::LangMeta & meta) size_t DescriptionsCollector::FindPageAndFill(std::string const & path, descriptions::LangMeta & meta)
{ {
int size = -1; size_t size = 0;
if (!IsValidDir(path)) if (path.empty() || !IsValidDir(path))
return size; return size;
Platform::FilesList filelist; Platform::FilesList filelist;
@@ -184,24 +176,27 @@ int DescriptionsCollector::FindPageAndFill(std::string const & path, description
continue; continue;
} }
if (size < 0)
size = 0;
m_stat.IncCode(code);
auto res = m_path2Index.try_emplace(base::JoinPath(path, filename), 0); auto res = m_path2Index.try_emplace(base::JoinPath(path, filename), 0);
if (res.second) if (res.second)
{ {
auto const & filePath = res.first->first; auto const & filePath = res.first->first;
auto content = FillStringFromFile(filePath);
size_t const sz = content.size();
if (sz == 0)
{
LOG(LWARNING, ("Empty descriptions file:", filePath));
m_path2Index.erase(res.first);
continue;
}
auto & strings = m_collection.m_strings; auto & strings = m_collection.m_strings;
res.first->second = strings.size(); res.first->second = strings.size();
strings.push_back(FillStringFromFile(filePath)); strings.push_back(std::move(content));
size_t const sz = strings.back().size();
CHECK(sz > 0, ("Empty file:", filePath));
size += sz; size += sz;
} }
m_stat.IncCode(code);
meta.emplace_back(code, res.first->second); meta.emplace_back(code, res.first->second);
} }

View File

@@ -78,8 +78,8 @@ public:
static std::string FillStringFromFile(std::string const & fullPath); static std::string FillStringFromFile(std::string const & fullPath);
/// @return -1 If page not found. 0 if page from cache. Size > 0 if page was loaded from disk. /// @return Aggregated loaded from disk page's size.
int FindPageAndFill(std::string const & wikipediaUrl, descriptions::LangMeta & meta); size_t FindPageAndFill(std::string const & wikipediaUrl, descriptions::LangMeta & meta);
public: public:
DescriptionsCollectionBuilderStat m_stat; DescriptionsCollectionBuilderStat m_stat;

View File

@@ -113,8 +113,10 @@ public:
StringUtf8Multilang str; StringUtf8Multilang str;
std::string const badUrl = "https://en.wikipedia.org/wiki/Not_exists"; std::string const badUrl = "https://en.wikipedia.org/wiki/Not_exists";
auto const path = DescriptionsCollector::MakePathForWikipedia(m_wikiDir, badUrl); auto const path = DescriptionsCollector::MakePathForWikipedia(m_wikiDir, badUrl);
descriptions::LangMeta meta; descriptions::LangMeta meta;
TEST_EQUAL(collector.FindPageAndFill(path, meta), -1, ()); TEST_EQUAL(collector.FindPageAndFill(path, meta), 0, ());
TEST(meta.empty(), ());
} }
} }