You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iotdb.apache.org by ch...@apache.org on 2021/06/05 01:04:05 UTC

[iotdb] 01/01: fix import csv split by comma bug (#3253)

This is an automated email from the ASF dual-hosted git repository.

chaow pushed a commit to branch cherry_pick_#3253
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 600e7ca631487d8640c928e621ae07df28dc10ea
Author: Haimei Guo <68...@users.noreply.github.com>
AuthorDate: Mon May 31 15:28:40 2021 +0800

    fix import csv split by comma bug (#3253)
---
 .../main/java/org/apache/iotdb/tool/ImportCsv.java | 41 +++++++---------------
 .../org/apache/iotdb/tool/CsvLineSplitTest.java    | 11 ++++--
 2 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
index 6959360..a3179d3 100644
--- a/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
+++ b/cli/src/main/java/org/apache/iotdb/tool/ImportCsv.java
@@ -424,38 +424,21 @@ public class ImportCsv extends AbstractCsvTool {
 
   public static String[] splitCsvLine(String path) {
     List<String> nodes = new ArrayList<>();
-    startIndex = 0;
-    for (i = 0; i < path.length(); i++) {
-      if (path.charAt(i) == ',') {
-        nodes.add(path.substring(startIndex, i));
-        startIndex = i + 1;
-      } else if (path.charAt(i) == '"') {
-        nextNode(path, nodes, '"');
+    int start = 0;
+    boolean singleQuotes = false;
+    boolean doubleQuotes = false;
+    // split by comma if the comma is followed by an even number of quotes
+    for (int i = 0; i < path.length(); i++) {
+      if (path.charAt(i) == '\"') {
+        doubleQuotes = !doubleQuotes; // toggle state
       } else if (path.charAt(i) == '\'') {
-        nextNode(path, nodes, '\'');
+        singleQuotes = !singleQuotes;
+      } else if (path.charAt(i) == ',' && (!singleQuotes && !doubleQuotes)) {
+        nodes.add(path.substring(start, i));
+        start = i + 1;
       }
     }
-    if (path.charAt(path.length() - 1) == ',') {
-      nodes.add("");
-    }
-    if (startIndex <= path.length() - 1) {
-      nodes.add(path.substring(startIndex));
-    }
+    nodes.add(path.substring(start));
     return nodes.toArray(new String[0]);
   }
-
-  public static void nextNode(String path, List<String> nodes, char enclose) {
-    int endIndex = path.indexOf(enclose, i + 1);
-    // if a double quotes with escape character
-    while (endIndex != -1 && path.charAt(endIndex - 1) == '\\') {
-      endIndex = path.indexOf(enclose, endIndex + 1);
-    }
-    if (endIndex != -1 && (endIndex == path.length() - 1 || path.charAt(endIndex + 1) == ',')) {
-      nodes.add(path.substring(startIndex + 1, endIndex));
-      i = endIndex + 1;
-      startIndex = endIndex + 2;
-    } else {
-      throw new IllegalArgumentException("Illegal csv line" + path);
-    }
-  }
 }
diff --git a/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java b/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
index fd1c9ba..2b54434 100644
--- a/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
+++ b/cli/src/test/java/org/apache/iotdb/tool/CsvLineSplitTest.java
@@ -26,8 +26,15 @@ public class CsvLineSplitTest {
   @Test
   public void testSplit() {
     Assert.assertArrayEquals(
-        new String[] {"", "a", "b", "c", "\\\""}, ImportCsv.splitCsvLine(",a,b,c,\"\\\"\""));
+        new String[] {"", "a", "b", "c", "\\\""}, ImportCsv.splitCsvLine(",a,b,c,\\\""));
     Assert.assertArrayEquals(
-        new String[] {"", "a", "b", "\\'"}, ImportCsv.splitCsvLine(",a,b,\"\\'\""));
+        new String[] {"", "a", "b", "\\'"}, ImportCsv.splitCsvLine(",a,b,\\'"));
+    Assert.assertArrayEquals(
+        new String[] {"", "a\",\"a", "\"a,,\"", "'"}, ImportCsv.splitCsvLine(",a\",\"a,\"a,,\",'"));
+    Assert.assertArrayEquals(
+        new String[] {"True", "a=\",\"a''"}, ImportCsv.splitCsvLine("True,a=\",\"a''"));
+    Assert.assertArrayEquals(
+        new String[] {"True", "\"a=,,,a=z//z'a\""},
+        ImportCsv.splitCsvLine("True,\"a=,,,a=z//z'a\""));
   }
 }