Quotes support for CSV parser

Signed-off-by: Alexander Borsuk <me@alex.bio>
This commit is contained in:
Alexander Borsuk
2023-06-21 14:30:02 +02:00
committed by Konstantin Pastbin
parent 128b0f3e2b
commit fb1279ca5a
2 changed files with 97 additions and 6 deletions

View File

@@ -483,15 +483,66 @@ bool AlmostEqual(std::string const & str1, std::string const & str2, size_t mism
return false; return false;
} }
void ParseCSVRow(std::string const & s, char const delimiter, std::vector<std::string> & target) namespace
{
// Trim, unquote the string, and unescape two double quotes.
std::string & UnescapeCSVColumn(std::string & s)
{
Trim(s);
if (s.size() < 2)
return s;
if (*s.begin() == '"' && *s.rbegin() == '"')
s = s.substr(1, s.size() - 2);
for (size_t i = 1; i < s.size(); ++i)
if (s[i] == '"' && s[i - 1] == '"')
s.erase(i, 1);
return s;
}
} // namespace
void ParseCSVRow(std::string const & row, char const delimiter, std::vector<std::string> & target)
{ {
target.clear(); target.clear();
TokenizeIterator<SimpleDelimiter, std::string::const_iterator, true /* KeepEmptyTokens */> it(s.begin(), s.end(), delimiter);
for (; it; ++it) std::string prevColumns;
for (TokenizeIterator<SimpleDelimiter, std::string::const_iterator, true /* KeepEmptyTokens */> it {row.begin(), row.end(), delimiter}; it; ++it)
{ {
std::string column(*it); std::string_view column = *it;
Trim(column); size_t const quotesCount = std::count(column.begin(), column.end(), '"');
target.push_back(std::move(column)); bool const evenQuotes = quotesCount % 2 == 0;
if (prevColumns.empty())
{
if (evenQuotes)
{
if (quotesCount == 0)
target.emplace_back(column);
else
{
std::string strColumn {column};
target.push_back(UnescapeCSVColumn(strColumn));
}
}
else
{
prevColumns = column;
prevColumns.push_back(',');
}
}
else
{
prevColumns.append(column);
if (evenQuotes)
prevColumns.push_back(',');
else
{
target.push_back(UnescapeCSVColumn(prevColumns));
prevColumns.clear();
}
}
} }
// Special case: if the string is empty, return an empty array instead of {""}. // Special case: if the string is empty, return an empty array instead of {""}.

View File

@@ -8,6 +8,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
namespace csv_reader_test
{
using platform::tests_support::ScopedFile; using platform::tests_support::ScopedFile;
using Row = coding::CSVReader::Row; using Row = coding::CSVReader::Row;
@@ -179,3 +181,41 @@ UNIT_TEST(CSVReaderIterator)
TEST_EQUAL(index, answer.size(), ()); TEST_EQUAL(index, answer.size(), ());
} }
} }
UNIT_TEST(CSVReaderEmptyColumns)
{
auto const kContentWithEmptyColumns = ",,2,,4,\n,,,,,";
auto const fileName = "test.csv";
ScopedFile sf(fileName, kContentWithEmptyColumns);
Rows const answer = {{"", "", "2", "", "4", ""}, {"", "", "", "", "", ""}};
coding::CSVReader reader(sf.GetFullPath());
size_t index = 0;
while (auto const optionalRow = reader.ReadRow())
{
TEST_EQUAL(*optionalRow, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
TEST(!reader.ReadRow(), ());
TEST(!reader.ReadRow(), ());
}
UNIT_TEST(CSVReaderQuotes)
{
auto const kContentWithQuotes = R"(noquotes, "" , "with space","with, comma","""double"" quotes","""double,"", commas", """""",)";
auto const fileName = "test.csv";
ScopedFile sf(fileName, kContentWithQuotes);
Rows const answer = {{"noquotes", "", "with space", "with, comma", "\"double\" quotes", "\"double,\", commas","\"\"", ""}};
coding::CSVReader reader(sf.GetFullPath());
size_t index = 0;
while (auto const optionalRow = reader.ReadRow())
{
TEST_EQUAL(*optionalRow, answer[index], ());
++index;
}
TEST_EQUAL(index, answer.size(), ());
TEST(!reader.ReadRow(), ());
TEST(!reader.ReadRow(), ());
}
} // namespace csv_reader_test