You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by st...@apache.org on 2021/01/06 17:53:04 UTC
[phoenix] branch master updated: PHOENIX-3499 Enable null value for
quote character for CSVBulkLoad tool
This is an automated email from the ASF dual-hosted git repository.
stoty pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/master by this push:
new 5ef5faa PHOENIX-3499 Enable null value for quote character for CSVBulkLoad tool
5ef5faa is described below
commit 5ef5faa63b8d7f68c327239a1c66de11876dc80c
Author: Sergey Soldatov <ss...@apache.org>
AuthorDate: Fri Nov 18 03:24:00 2016 -0800
PHOENIX-3499 Enable null value for quote character for CSVBulkLoad tool
---
.../apache/phoenix/end2end/CsvBulkLoadToolIT.java | 37 +++++++++++++++++++++-
.../phoenix/mapreduce/CsvBulkImportUtil.java | 11 ++++---
.../apache/phoenix/mapreduce/CsvBulkLoadTool.java | 26 +++++++++------
.../phoenix/mapreduce/CsvToKeyValueMapper.java | 2 +-
4 files changed, 61 insertions(+), 15 deletions(-)
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
index e1c8714..81c2255 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CsvBulkLoadToolIT.java
@@ -139,7 +139,6 @@ public class CsvBulkLoadToolIT extends BaseOwnClusterIT {
stmt.close();
}
-
@Test
public void testImportWithTabs() throws Exception {
@@ -173,6 +172,42 @@ public class CsvBulkLoadToolIT extends BaseOwnClusterIT {
}
@Test
+ public void testImportWithTabsAndEmptyQuotes() throws Exception {
+
+ Statement stmt = conn.createStatement();
+ stmt.execute("CREATE TABLE TABLE8 (ID INTEGER NOT NULL PRIMARY KEY, " +
+ "NAME1 VARCHAR, NAME2 VARCHAR)");
+
+ FileSystem fs = FileSystem.get(getUtility().getConfiguration());
+ FSDataOutputStream outputStream = fs.create(new Path("/tmp/input8.csv"));
+ PrintWriter printWriter = new PrintWriter(outputStream);
+ printWriter.println("1\t\"\\t123\tName 2a");
+ printWriter.println("2\tName 2a\tName 2b");
+ printWriter.close();
+
+ CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
+ csvBulkLoadTool.setConf(getUtility().getConfiguration());
+ int exitCode = csvBulkLoadTool.run(new String[] {
+ "--input", "/tmp/input8.csv",
+ "--table", "table8",
+ "--zookeeper", zkQuorum,
+ "-q", "\"\"",
+ "-e", "\"\"",
+ "--delimiter", "\\t"
+ });
+ assertEquals(0, exitCode);
+
+ ResultSet rs = stmt.executeQuery("SELECT id, name1, name2 FROM table8 ORDER BY id");
+ assertTrue(rs.next());
+ assertEquals(1, rs.getInt(1));
+ assertEquals("\"\\t123", rs.getString(2));
+ assertEquals("Name 2a", rs.getString(3));
+
+ rs.close();
+ stmt.close();
+ }
+
+ @Test
public void testFullOptionImport() throws Exception {
Statement stmt = conn.createStatement();
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
index b992f82..21787b2 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkImportUtil.java
@@ -43,8 +43,8 @@ public class CsvBulkImportUtil {
* @param arrayDelimiter array delimiter character, can be null
* @param binaryEncoding
*/
- public static void initCsvImportJob(Configuration conf, char fieldDelimiter, char quoteChar,
- char escapeChar, String arrayDelimiter, String binaryEncoding) {
+ public static void initCsvImportJob(Configuration conf, char fieldDelimiter, Character quoteChar,
+ Character escapeChar, String arrayDelimiter, String binaryEncoding) {
setChar(conf, CsvToKeyValueMapper.FIELD_DELIMITER_CONFKEY, fieldDelimiter);
setChar(conf, CsvToKeyValueMapper.QUOTE_CHAR_CONFKEY, quoteChar);
setChar(conf, CsvToKeyValueMapper.ESCAPE_CHAR_CONFKEY, escapeChar);
@@ -69,8 +69,11 @@ public class CsvBulkImportUtil {
}
@VisibleForTesting
- static void setChar(Configuration conf, String confKey, char charValue) {
- conf.set(confKey, Bytes.toString(Base64.getEncoder().encode(Character.toString(charValue).getBytes())));
+
+ static void setChar(Configuration conf, String confKey, Character charValue) {
+ if(charValue!=null) {
+ conf.set(confKey, Bytes.toString(Base64.getEncoder().encode(charValue.toString().getBytes())));
+ }
}
@VisibleForTesting
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
index 8daf5d3..7e0a6fd 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java
@@ -55,31 +55,39 @@ public class CsvBulkLoadTool extends AbstractBulkLoadTool {
// we don't parse ZK_QUORUM_OPT here because we need it in order to
// create the connection we need to build importColumns.
- char delimiterChar = ',';
+ Character delimiterChar = ',';
if (cmdLine.hasOption(DELIMITER_OPT.getOpt())) {
- String delimString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue(DELIMITER_OPT.getOpt()));
+ String delimString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue
+ (DELIMITER_OPT.getOpt()));
if (delimString.length() != 1) {
throw new IllegalArgumentException("Illegal delimiter character: " + delimString);
}
delimiterChar = delimString.charAt(0);
}
- char quoteChar = '"';
+ Character quoteChar = '"';
if (cmdLine.hasOption(QUOTE_OPT.getOpt())) {
- String quoteString = cmdLine.getOptionValue(QUOTE_OPT.getOpt());
- if (quoteString.length() != 1) {
+ String quoteString = StringEscapeUtils.unescapeJava(cmdLine.getOptionValue(QUOTE_OPT
+ .getOpt()));
+ if(quoteString.length() == 0) {
+ quoteChar = null;
+ } else if (quoteString.length() != 1) {
throw new IllegalArgumentException("Illegal quote character: " + quoteString);
+ } else {
+ quoteChar = quoteString.charAt(0);
}
- quoteChar = quoteString.charAt(0);
}
- char escapeChar = '\\';
+ Character escapeChar = '\\';
if (cmdLine.hasOption(ESCAPE_OPT.getOpt())) {
String escapeString = cmdLine.getOptionValue(ESCAPE_OPT.getOpt());
- if (escapeString.length() != 1) {
+ if(escapeString.length() == 0) {
+ escapeChar = null;
+ } else if (escapeString.length() != 1) {
throw new IllegalArgumentException("Illegal escape character: " + escapeString);
+ } else {
+ escapeChar = escapeString.charAt(0);
}
- escapeChar = escapeString.charAt(0);
}
String binaryEncoding = null;
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
index cd86bdc..8c28c3c 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapper.java
@@ -92,7 +92,7 @@ public class CsvToKeyValueMapper extends FormatToBytesWritableMapper<CSVRecord>
static class CsvLineParser implements LineParser<CSVRecord> {
private final CSVFormat csvFormat;
- CsvLineParser(char fieldDelimiter, char quote, char escape) {
+ CsvLineParser(Character fieldDelimiter, Character quote, Character escape) {
this.csvFormat = CSVFormat.DEFAULT
.withIgnoreEmptyLines(true)
.withDelimiter(fieldDelimiter)