You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by st...@apache.org on 2021/01/06 17:53:04 UTC

[phoenix] branch master updated: PHOENIX-3499 Enable null value for quote character for CSVBulkLoad tool

This is an automated email from the ASF dual-hosted git repository.

stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix.git


The following commit(s) were added to refs/heads/master by this push:
     new 5ef5faa  PHOENIX-3499 Enable null value for quote character for CSVBulkLoad tool
5ef5faa is described below

commit 5ef5faa63b8d7f68c327239a1c66de11876dc80c
Author: Sergey Soldatov <ss...@apache.org>
AuthorDate: Fri Nov 18 03:24:00 2016 -0800

    PHOENIX-3499 Enable null value for quote character for CSVBulkLoad tool
---
 .../apache/phoenix/end2end/CsvBulkLoadToolIT.java  | 37 +++++++++++++++++++++-
 .../phoenix/mapreduce/CsvBulkImportUtil.java       | 11 ++++---
 .../apache/phoenix/mapreduce/CsvBulkLoadTool.java  | 26 +++++++++------
 .../phoenix/mapreduce/CsvToKeyValueMapper.java     |  2 +-
 4 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
index e1c8714..81c2255 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
@@ -139,7 +139,6 @@ public class CsvBulkLoadToolIT extends BaseOwnClusterIT {
         stmt.close();
     }
 
-
     @Test
     public void testImportWithTabs() throws Exception {
 
@@ -173,6 +172,42 @@ public class CsvBulkLoadToolIT extends BaseOwnClusterIT {
     }
 
     @Test
+    public void testImportWithTabsAndEmptyQuotes() throws Exception {
+
+        Statement stmt = conn.createStatement();
+        stmt.execute("CREATE TABLE TABLE8 (ID INTEGER NOT NULL PRIMARY KEY, " +
+                "NAME1 VARCHAR, NAME2 VARCHAR)");
+
+        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
+        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input8.csv"));
+        PrintWriter printWriter = new PrintWriter(outputStream);
+        printWriter.println("1\t\"\\t123\tName 2a");
+        printWriter.println("2\tName 2a\tName 2b");
+        printWriter.close();
+
+        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
+        csvBulkLoadTool.setConf(getUtility().getConfiguration());
+        int exitCode = csvBulkLoadTool.run(new String[] {
+                "--input", "/tmp/input8.csv",
+                "--table", "table8",
+                "--zookeeper", zkQuorum,
+                "-q", "\"\"",
+                "-e", "\"\"",
+                "--delimiter", "\\t"
+                });
+        assertEquals(0, exitCode);
+
+        ResultSet rs = stmt.executeQuery("SELECT id, name1, name2 FROM table8 ORDER BY id");
+        assertTrue(rs.next());
+        assertEquals(1, rs.getInt(1));
+        assertEquals("\"\\t123", rs.getString(2));
+        assertEquals("Name 2a", rs.getString(3));
+
+        rs.close();
+        stmt.close();
+    }
+
+    @Test
     public void testFullOptionImport() throws Exception {
 
         Statement stmt = conn.createStatement();
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
index b992f82..21787b2 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
@@ -43,8 +43,8 @@ public class CsvBulkImportUtil {
      * @param arrayDelimiter array delimiter character, can be null
      * @param binaryEncoding 
      */
-    public static void initCsvImportJob(Configuration conf, char fieldDelimiter, char quoteChar,
-            char escapeChar, String arrayDelimiter, String binaryEncoding) {
+    public static void initCsvImportJob(Configuration conf, char fieldDelimiter, Character quoteChar,
+            Character escapeChar, String arrayDelimiter, String binaryEncoding) {
         setChar(conf, CsvToKeyValueMapper.FIELD_DELIMITER_CONFKEY, fieldDelimiter);
         setChar(conf, CsvToKeyValueMapper.QUOTE_CHAR_CONFKEY, quoteChar);
         setChar(conf, CsvToKeyValueMapper.ESCAPE_CHAR_CONFKEY, escapeChar);
@@ -69,8 +69,11 @@ public class CsvBulkImportUtil {
     }
 
     @VisibleForTesting
-    static void setChar(Configuration conf, String confKey, char charValue) {
-        conf.set(confKey, Bytes.toString(Base64.getEncoder().encode(Character.toString(charValue).getBytes())));
+
+    static void setChar(Configuration conf, String confKey, Character charValue) {
+        if(charValue!=null) {
+            conf.set(confKey, Bytes.toString(Base64.getEncoder().encode(charValue.toString().getBytes())));
+        }
     }
 
     @VisibleForTesting
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
index 8daf5d3..7e0a6fd 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
@@ -55,31 +55,39 @@ public class CsvBulkLoadTool extends AbstractBulkLoadTool {
         // we don't parse ZK_QUORUM_OPT here because we need it in order to
         // create the connection we need to build importColumns.
 
-        char delimiterChar = ',';
+        Character delimiterChar = ',';
         if (cmdLine.hasOption(DELIMITER_OPT.getOpt())) {
-            String delimString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue(DELIMITER_OPT.getOpt()));
+            String delimString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue
+                    (DELIMITER_OPT.getOpt()));
             if (delimString.length() != 1) {
                 throw new IllegalArgumentException("Illegal delimiter character: " + delimString);
             }
             delimiterChar = delimString.charAt(0);
         }
 
-        char quoteChar = '"';
+        Character quoteChar = '"';
         if (cmdLine.hasOption(QUOTE_OPT.getOpt())) {
-            String quoteString = cmdLine.getOptionValue(QUOTE_OPT.getOpt());
-            if (quoteString.length() != 1) {
+            String quoteString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue(QUOTE_OPT
+                    .getOpt()));
+            if(quoteString.length() == 0) {
+                quoteChar = null;
+            } else if (quoteString.length() != 1) {
                 throw new IllegalArgumentException("Illegal quote character: " + quoteString);
+            } else {
+                quoteChar = quoteString.charAt(0);
             }
-            quoteChar = quoteString.charAt(0);
         }
 
-        char escapeChar = '\\';
+        Character escapeChar = '\\';
         if (cmdLine.hasOption(ESCAPE_OPT.getOpt())) {
             String escapeString = cmdLine.getOptionValue(ESCAPE_OPT.getOpt());
-            if (escapeString.length() != 1) {
+            if(escapeString.length() == 0) {
+                escapeChar = null;
+            } else if (escapeString.length() != 1) {
                 throw new IllegalArgumentException("Illegal escape character: " + escapeString);
+            } else {
+                escapeChar = escapeString.charAt(0);
             }
-            escapeChar = escapeString.charAt(0);
         }
         
         String binaryEncoding = null;
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
index cd86bdc..8c28c3c 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
@@ -92,7 +92,7 @@ public class CsvToKeyValueMapper extends FormatToBytesWritableMapper<CSVRecord>
     static class CsvLineParser implements LineParser<CSVRecord> {
         private final CSVFormat csvFormat;
 
-        CsvLineParser(char fieldDelimiter, char quote, char escape) {
+        CsvLineParser(Character fieldDelimiter, Character quote, Character escape) {
             this.csvFormat = CSVFormat.DEFAULT
                     .withIgnoreEmptyLines(true)
                     .withDelimiter(fieldDelimiter)