Fixed getting url bug.

Signed-off-by: Viktor Govako <viktor.govako@gmail.com>
This commit is contained in:
Viktor Govako
2022-09-07 00:44:48 +03:00
committed by zyphlar
parent 623c4ed9fe
commit 6d6140ee33
3 changed files with 31 additions and 34 deletions

View File

@@ -2,7 +2,6 @@
#include "generator/utils.hpp"
#include "indexer/feature.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/feature_processor.hpp"
#include "platform/platform.hpp"
@@ -97,36 +96,29 @@ void DescriptionsCollector::operator() (FeatureType & ft, uint32_t featureId)
void DescriptionsCollector::operator() (std::string const & wikiUrl, uint32_t featureId)
{
std::string path;
descriptions::LangMeta langsMeta;
size_t size = 0;
// First try to get wikipedia url.
bool const isWikiUrl = !wikiUrl.empty();
if (isWikiUrl)
if (!wikiUrl.empty())
size = FindPageAndFill(MakePathForWikipedia(m_wikipediaDir, wikiUrl), langsMeta);
// Second try to get wikidata id.
bool const isWikiUrl = !langsMeta.empty();
if (!isWikiUrl)
{
path = MakePathForWikipedia(m_wikipediaDir, wikiUrl);
}
else
{
// Second try to get wikidata id.
auto const wikidataId = m_wikidataHelper.GetWikidataId(featureId);
if (wikidataId)
path = MakePathForWikidata(m_wikipediaDir, *wikidataId);
size = FindPageAndFill(MakePathForWikidata(m_wikipediaDir, *wikidataId), langsMeta);
}
if (path.empty())
if (langsMeta.empty())
return;
descriptions::LangMeta langsMeta;
int const sz = FindPageAndFill(path, langsMeta);
if (sz < 0)
{
LOG(LWARNING, ("Page", path, "not found."));
return;
}
else if (sz > 0)
if (size > 0)
{
// Add only new loaded pages (not from cache).
m_stat.AddSize(sz);
m_stat.AddSize(size);
m_stat.IncPage();
}
@@ -166,10 +158,10 @@ std::string DescriptionsCollector::FillStringFromFile(std::string const & fullPa
return std::string(std::istreambuf_iterator<char>(stream), std::istreambuf_iterator<char>());
}
int DescriptionsCollector::FindPageAndFill(std::string const & path, descriptions::LangMeta & meta)
size_t DescriptionsCollector::FindPageAndFill(std::string const & path, descriptions::LangMeta & meta)
{
int size = -1;
if (!IsValidDir(path))
size_t size = 0;
if (path.empty() || !IsValidDir(path))
return size;
Platform::FilesList filelist;
@@ -184,24 +176,27 @@ int DescriptionsCollector::FindPageAndFill(std::string const & path, description
continue;
}
if (size < 0)
size = 0;
m_stat.IncCode(code);
auto res = m_path2Index.try_emplace(base::JoinPath(path, filename), 0);
if (res.second)
{
auto const & filePath = res.first->first;
auto content = FillStringFromFile(filePath);
size_t const sz = content.size();
if (sz == 0)
{
LOG(LWARNING, ("Empty descriptions file:", filePath));
m_path2Index.erase(res.first);
continue;
}
auto & strings = m_collection.m_strings;
res.first->second = strings.size();
strings.push_back(FillStringFromFile(filePath));
strings.push_back(std::move(content));
size_t const sz = strings.back().size();
CHECK(sz > 0, ("Empty file:", filePath));
size += sz;
}
m_stat.IncCode(code);
meta.emplace_back(code, res.first->second);
}

View File

@@ -78,8 +78,8 @@ public:
static std::string FillStringFromFile(std::string const & fullPath);
/// @return -1 If page not found. 0 if page from cache. Size > 0 if page was loaded from disk.
int FindPageAndFill(std::string const & wikipediaUrl, descriptions::LangMeta & meta);
/// @return Aggregated loaded from disk page's size.
size_t FindPageAndFill(std::string const & wikipediaUrl, descriptions::LangMeta & meta);
public:
DescriptionsCollectionBuilderStat m_stat;

View File

@@ -113,8 +113,10 @@ public:
StringUtf8Multilang str;
std::string const badUrl = "https://en.wikipedia.org/wiki/Not_exists";
auto const path = DescriptionsCollector::MakePathForWikipedia(m_wikiDir, badUrl);
descriptions::LangMeta meta;
TEST_EQUAL(collector.FindPageAndFill(path, meta), -1, ());
TEST_EQUAL(collector.FindPageAndFill(path, meta), 0, ());
TEST(meta.empty(), ());
}
}