diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 0dcb6d15b..670b125b3 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -483,15 +483,66 @@ bool AlmostEqual(std::string const & str1, std::string const & str2, size_t mism return false; } -void ParseCSVRow(std::string const & s, char const delimiter, std::vector & target) +namespace +{ +// Trim, unquote the string, and unescape two double quotes. +std::string & UnescapeCSVColumn(std::string & s) +{ + Trim(s); + + if (s.size() < 2) + return s; + + if (*s.begin() == '"' && *s.rbegin() == '"') + s = s.substr(1, s.size() - 2); + + for (size_t i = 1; i < s.size(); ++i) + if (s[i] == '"' && s[i - 1] == '"') + s.erase(i, 1); + + return s; +} +} // namespace + +void ParseCSVRow(std::string const & row, char const delimiter, std::vector & target) { target.clear(); - TokenizeIterator it(s.begin(), s.end(), delimiter); - for (; it; ++it) + + std::string prevColumns; + for (TokenizeIterator it {row.begin(), row.end(), delimiter}; it; ++it) { - std::string column(*it); - Trim(column); - target.push_back(std::move(column)); + std::string_view column = *it; + size_t const quotesCount = std::count(column.begin(), column.end(), '"'); + bool const evenQuotes = quotesCount % 2 == 0; + if (prevColumns.empty()) + { + if (evenQuotes) + { + if (quotesCount == 0) + target.emplace_back(column); + else + { + std::string strColumn {column}; + target.push_back(UnescapeCSVColumn(strColumn)); + } + } + else + { + prevColumns = column; + prevColumns.push_back(','); + } + } + else + { + prevColumns.append(column); + if (evenQuotes) + prevColumns.push_back(','); + else + { + target.push_back(UnescapeCSVColumn(prevColumns)); + prevColumns.clear(); + } + } } // Special case: if the string is empty, return an empty array instead of {""}. diff --git a/coding/coding_tests/csv_reader_test.cpp b/coding/coding_tests/csv_reader_test.cpp index 1bf970761..dd2ad6b8b 100644 --- a/coding/coding_tests/csv_reader_test.cpp +++ b/coding/coding_tests/csv_reader_test.cpp @@ -8,6 +8,8 @@ #include #include +namespace csv_reader_test +{ using platform::tests_support::ScopedFile; using Row = coding::CSVReader::Row; @@ -179,3 +181,41 @@ UNIT_TEST(CSVReaderIterator) TEST_EQUAL(index, answer.size(), ()); } } + +UNIT_TEST(CSVReaderEmptyColumns) +{ + auto const kContentWithEmptyColumns = ",,2,,4,\n,,,,,"; + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kContentWithEmptyColumns); + Rows const answer = {{"", "", "2", "", "4", ""}, {"", "", "", "", "", ""}}; + coding::CSVReader reader(sf.GetFullPath()); + size_t index = 0; + while (auto const optionalRow = reader.ReadRow()) + { + TEST_EQUAL(*optionalRow, answer[index], ()); + ++index; + } + TEST_EQUAL(index, answer.size(), ()); + TEST(!reader.ReadRow(), ()); + TEST(!reader.ReadRow(), ()); +} + +UNIT_TEST(CSVReaderQuotes) +{ + auto const kContentWithQuotes = R"(noquotes, "" , "with space","with, comma","""double"" quotes","""double,"", commas", """""",)"; + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kContentWithQuotes); + Rows const answer = {{"noquotes", "", "with space", "with, comma", "\"double\" quotes", "\"double,\", commas","\"\"", ""}}; + coding::CSVReader reader(sf.GetFullPath()); + size_t index = 0; + while (auto const optionalRow = reader.ReadRow()) + { + TEST_EQUAL(*optionalRow, answer[index], ()); + ++index; + } + TEST_EQUAL(index, answer.size(), ()); + TEST(!reader.ReadRow(), ()); + TEST(!reader.ReadRow(), ()); +} + +} // namespace csv_reader_test