mirror of
https://codeberg.org/comaps/comaps
synced 2025-12-19 13:03:36 +00:00
Quotes support for CSV parser
Signed-off-by: Alexander Borsuk <me@alex.bio>
This commit is contained in:
committed by
Konstantin Pastbin
parent
128b0f3e2b
commit
fb1279ca5a
@@ -483,15 +483,66 @@ bool AlmostEqual(std::string const & str1, std::string const & str2, size_t mism
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ParseCSVRow(std::string const & s, char const delimiter, std::vector<std::string> & target)
|
namespace
|
||||||
|
{
|
||||||
|
// Trim, unquote the string, and unescape two double quotes.
|
||||||
|
std::string & UnescapeCSVColumn(std::string & s)
|
||||||
|
{
|
||||||
|
Trim(s);
|
||||||
|
|
||||||
|
if (s.size() < 2)
|
||||||
|
return s;
|
||||||
|
|
||||||
|
if (*s.begin() == '"' && *s.rbegin() == '"')
|
||||||
|
s = s.substr(1, s.size() - 2);
|
||||||
|
|
||||||
|
for (size_t i = 1; i < s.size(); ++i)
|
||||||
|
if (s[i] == '"' && s[i - 1] == '"')
|
||||||
|
s.erase(i, 1);
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void ParseCSVRow(std::string const & row, char const delimiter, std::vector<std::string> & target)
|
||||||
{
|
{
|
||||||
target.clear();
|
target.clear();
|
||||||
TokenizeIterator<SimpleDelimiter, std::string::const_iterator, true /* KeepEmptyTokens */> it(s.begin(), s.end(), delimiter);
|
|
||||||
for (; it; ++it)
|
std::string prevColumns;
|
||||||
|
for (TokenizeIterator<SimpleDelimiter, std::string::const_iterator, true /* KeepEmptyTokens */> it {row.begin(), row.end(), delimiter}; it; ++it)
|
||||||
{
|
{
|
||||||
std::string column(*it);
|
std::string_view column = *it;
|
||||||
Trim(column);
|
size_t const quotesCount = std::count(column.begin(), column.end(), '"');
|
||||||
target.push_back(std::move(column));
|
bool const evenQuotes = quotesCount % 2 == 0;
|
||||||
|
if (prevColumns.empty())
|
||||||
|
{
|
||||||
|
if (evenQuotes)
|
||||||
|
{
|
||||||
|
if (quotesCount == 0)
|
||||||
|
target.emplace_back(column);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::string strColumn {column};
|
||||||
|
target.push_back(UnescapeCSVColumn(strColumn));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
prevColumns = column;
|
||||||
|
prevColumns.push_back(',');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
prevColumns.append(column);
|
||||||
|
if (evenQuotes)
|
||||||
|
prevColumns.push_back(',');
|
||||||
|
else
|
||||||
|
{
|
||||||
|
target.push_back(UnescapeCSVColumn(prevColumns));
|
||||||
|
prevColumns.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special case: if the string is empty, return an empty array instead of {""}.
|
// Special case: if the string is empty, return an empty array instead of {""}.
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
namespace csv_reader_test
|
||||||
|
{
|
||||||
using platform::tests_support::ScopedFile;
|
using platform::tests_support::ScopedFile;
|
||||||
|
|
||||||
using Row = coding::CSVReader::Row;
|
using Row = coding::CSVReader::Row;
|
||||||
@@ -179,3 +181,41 @@ UNIT_TEST(CSVReaderIterator)
|
|||||||
TEST_EQUAL(index, answer.size(), ());
|
TEST_EQUAL(index, answer.size(), ());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UNIT_TEST(CSVReaderEmptyColumns)
|
||||||
|
{
|
||||||
|
auto const kContentWithEmptyColumns = ",,2,,4,\n,,,,,";
|
||||||
|
auto const fileName = "test.csv";
|
||||||
|
ScopedFile sf(fileName, kContentWithEmptyColumns);
|
||||||
|
Rows const answer = {{"", "", "2", "", "4", ""}, {"", "", "", "", "", ""}};
|
||||||
|
coding::CSVReader reader(sf.GetFullPath());
|
||||||
|
size_t index = 0;
|
||||||
|
while (auto const optionalRow = reader.ReadRow())
|
||||||
|
{
|
||||||
|
TEST_EQUAL(*optionalRow, answer[index], ());
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
TEST_EQUAL(index, answer.size(), ());
|
||||||
|
TEST(!reader.ReadRow(), ());
|
||||||
|
TEST(!reader.ReadRow(), ());
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIT_TEST(CSVReaderQuotes)
|
||||||
|
{
|
||||||
|
auto const kContentWithQuotes = R"(noquotes, "" , "with space","with, comma","""double"" quotes","""double,"", commas", """""",)";
|
||||||
|
auto const fileName = "test.csv";
|
||||||
|
ScopedFile sf(fileName, kContentWithQuotes);
|
||||||
|
Rows const answer = {{"noquotes", "", "with space", "with, comma", "\"double\" quotes", "\"double,\", commas","\"\"", ""}};
|
||||||
|
coding::CSVReader reader(sf.GetFullPath());
|
||||||
|
size_t index = 0;
|
||||||
|
while (auto const optionalRow = reader.ReadRow())
|
||||||
|
{
|
||||||
|
TEST_EQUAL(*optionalRow, answer[index], ());
|
||||||
|
++index;
|
||||||
|
}
|
||||||
|
TEST_EQUAL(index, answer.size(), ());
|
||||||
|
TEST(!reader.ReadRow(), ());
|
||||||
|
TEST(!reader.ReadRow(), ());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace csv_reader_test
|
||||||
|
|||||||
Reference in New Issue
Block a user