You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ch...@apache.org on 2021/06/05 01:04:05 UTC
[iotdb] 01/01: fix import csv split by comma bug (#3253)
This is an automated email from the ASF dual-hosted git repository.
chaow pushed a commit to branch cherry_pick_#3253
in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 600e7ca631487d8640c928e621ae07df28dc10ea
Author: Haimei Guo <68...@users.noreply.github.com>
AuthorDate: Mon May 31 15:28:40 2021 +0800
fix import csv split by comma bug (#3253)
---
.../main/java/org/apache/iotdb/tool/ImportCsv.java | 41 +++++++---------------
.../org/apache/iotdb/tool/CsvLineSplitTest.java | 11 ++++--
2 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
index 6959360..a3179d3 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
@@ -424,38 +424,21 @@ public class ImportCsv extends AbstractCsvTool {
public static String[] splitCsvLine(String path) {
List<String> nodes = new ArrayList<>();
- startIndex = 0;
- for (i = 0; i < path.length(); i++) {
- if (path.charAt(i) == ',') {
- nodes.add(path.substring(startIndex, i));
- startIndex = i + 1;
- } else if (path.charAt(i) == '"') {
- nextNode(path, nodes, '"');
+ int start = 0;
+ boolean singleQuotes = false;
+ boolean doubleQuotes = false;
+ // split by comma if the comma is followed by an even number of quotes
+ for (int i = 0; i < path.length(); i++) {
+ if (path.charAt(i) == '\"') {
+ doubleQuotes = !doubleQuotes; // toggle state
} else if (path.charAt(i) == '\'') {
- nextNode(path, nodes, '\'');
+ singleQuotes = !singleQuotes;
+ } else if (path.charAt(i) == ',' && (!singleQuotes && !doubleQuotes)) {
+ nodes.add(path.substring(start, i));
+ start = i + 1;
}
}
- if (path.charAt(path.length() - 1) == ',') {
- nodes.add("");
- }
- if (startIndex <= path.length() - 1) {
- nodes.add(path.substring(startIndex));
- }
+ nodes.add(path.substring(start));
return nodes.toArray(new String[0]);
}
-
- public static void nextNode(String path, List<String> nodes, char enclose) {
- int endIndex = path.indexOf(enclose, i + 1);
- // if a double quotes with escape character
- while (endIndex != -1 && path.charAt(endIndex - 1) == '\\') {
- endIndex = path.indexOf(enclose, endIndex + 1);
- }
- if (endIndex != -1 && (endIndex == path.length() - 1 || path.charAt(endIndex + 1) == ',')) {
- nodes.add(path.substring(startIndex + 1, endIndex));
- i = endIndex + 1;
- startIndex = endIndex + 2;
- } else {
- throw new IllegalArgumentException("Illegal csv line" + path);
- }
- }
}
diff --git a/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java b/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
index fd1c9ba..2b54434 100644
--- a/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
+++ b/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
@@ -26,8 +26,15 @@ public class CsvLineSplitTest {
@Test
public void testSplit() {
Assert.assertArrayEquals(
- new String[] {"", "a", "b", "c", "\\\""}, ImportCsv.splitCsvLine(",a,b,c,\"\\\"\""));
+ new String[] {"", "a", "b", "c", "\\\""}, ImportCsv.splitCsvLine(",a,b,c,\\\""));
Assert.assertArrayEquals(
- new String[] {"", "a", "b", "\\'"}, ImportCsv.splitCsvLine(",a,b,\"\\'\""));
+ new String[] {"", "a", "b", "\\'"}, ImportCsv.splitCsvLine(",a,b,\\'"));
+ Assert.assertArrayEquals(
+ new String[] {"", "a\",\"a", "\"a,,\"", "'"}, ImportCsv.splitCsvLine(",a\",\"a,\"a,,\",'"));
+ Assert.assertArrayEquals(
+ new String[] {"True", "a=\",\"a''"}, ImportCsv.splitCsvLine("True,a=\",\"a''"));
+ Assert.assertArrayEquals(
+ new String[] {"True", "\"a=,,,a=z//z'a\""},
+ ImportCsv.splitCsvLine("True,\"a=,,,a=z//z'a\""));
}
}