You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/09/07 12:08:28 UTC
[1/2] incubator-carbondata git commit: add comment option
Repository: incubator-carbondata
Updated Branches:
refs/heads/master 4aa22415d -> 2af709eb9
add comment option
fix code style
fixed testcase
renew some files
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/4cb3b5e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/4cb3b5e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/4cb3b5e4
Branch: refs/heads/master
Commit: 4cb3b5e42aab5e3c4739744c0844fda455a477f7
Parents: 4aa2241
Author: lion-x <xl...@gmail.com>
Authored: Thu Sep 1 19:55:43 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Sep 7 20:07:19 2016 +0800
----------------------------------------------------------------------
.../hadoop/test/util/StoreCreator.java | 4 +++
.../carbondata/spark/load/CarbonLoadModel.java | 30 ++++++++++++++++++++
.../carbondata/spark/load/CarbonLoaderUtil.java | 4 +++
.../spark/util/GlobalDictionaryUtil.scala | 9 ++++++
.../org/apache/spark/sql/CarbonSqlParser.scala | 2 +-
.../execution/command/carbonTableSchema.scala | 3 ++
.../spark/src/test/resources/comment.csv | 5 ++++
.../dataload/TestLoadDataWithHiveSyntax.scala | 20 +++++++++++--
.../util/ExternalColumnDictionaryTestCase.scala | 1 +
...GlobalDictionaryUtilConcurrentTestCase.scala | 1 +
.../util/GlobalDictionaryUtilTestCase.scala | 1 +
.../api/dataloader/DataLoadModel.java | 13 +++++++++
.../processing/csvreaderstep/CsvInput.java | 2 ++
.../processing/csvreaderstep/CsvInputMeta.java | 30 ++++++++++++++++++++
.../csvreaderstep/UnivocityCsvParser.java | 3 ++
.../csvreaderstep/UnivocityCsvParserVo.java | 21 ++++++++++++++
.../dataprocessor/DataProcessTaskStatus.java | 14 +++++++++
.../dataprocessor/IDataProcessStatus.java | 4 +++
.../graphgenerator/GraphGenerator.java | 6 ++++
19 files changed, 170 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
----------------------------------------------------------------------
diff --git a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
index a48e6ad..5867160 100644
--- a/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
+++ b/hadoop/src/test/java/org/apache/carbondata/hadoop/test/util/StoreCreator.java
@@ -353,6 +353,8 @@ public class StoreCreator {
GraphGenerator.blockInfo.put("qwqwq", new BlockDetails[] { blockDetails });
schmaModel.setBlocksID("qwqwq");
schmaModel.setEscapeCharacter("\\");
+ schmaModel.setQuoteCharacter("\"");
+ schmaModel.setCommentCharacter("#");
info.setDatabaseName(databaseName);
info.setTableName(tableName);
@@ -460,6 +462,8 @@ public class StoreCreator {
model.setBlocksID(schmaModel.getBlocksID());
model.setFactTimeStamp(readCurrentTime());
model.setEscapeCharacter(schmaModel.getEscapeCharacter());
+ model.setQuoteCharacter(schmaModel.getQuoteCharacter());
+ model.setCommentCharacter(schmaModel.getCommentCharacter());
if (null != loadMetadataDetails && !loadMetadataDetails.isEmpty()) {
model.setLoadNames(
CarbonDataProcessorUtil.getLoadNameFromLoadMetaDataDetails(loadMetadataDetails));
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
index c1a073d..68f3929 100644
--- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
+++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoadModel.java
@@ -97,6 +97,16 @@ public class CarbonLoadModel implements Serializable {
private String escapeChar;
/**
+ * quote Char
+ */
+ private String quoteChar;
+
+ /**
+ * comment Char
+ */
+ private String commentChar;
+
+ /**
* defines the string that should be treated as null while loadind data
*/
private String serializationNullFormat;
@@ -322,6 +332,8 @@ public class CarbonLoadModel implements Serializable {
copy.segmentId = segmentId;
copy.serializationNullFormat = serializationNullFormat;
copy.escapeChar = escapeChar;
+ copy.quoteChar = quoteChar;
+ copy.commentChar = commentChar;
copy.maxColumns = maxColumns;
return copy;
}
@@ -361,6 +373,8 @@ public class CarbonLoadModel implements Serializable {
copyObj.segmentId = segmentId;
copyObj.serializationNullFormat = serializationNullFormat;
copyObj.escapeChar = escapeChar;
+ copyObj.quoteChar = quoteChar;
+ copyObj.commentChar = commentChar;
copyObj.maxColumns = maxColumns;
return copyObj;
}
@@ -531,6 +545,22 @@ public class CarbonLoadModel implements Serializable {
this.serializationNullFormat = serializationNullFormat;
}
+ public String getQuoteChar() {
+ return quoteChar;
+ }
+
+ public void setQuoteChar(String quoteChar) {
+ this.quoteChar = quoteChar;
+ }
+
+ public String getCommentChar() {
+ return commentChar;
+ }
+
+ public void setCommentChar(String commentChar) {
+ this.commentChar = commentChar;
+ }
+
/**
* @return
*/
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
index 246597c..a8302c0 100644
--- a/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
+++ b/integration/spark/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
@@ -126,6 +126,8 @@ public final class CarbonLoaderUtil {
}
model.setBlocksID(schmaModel.getBlocksID());
model.setEscapeCharacter(schmaModel.getEscapeCharacter());
+ model.setQuoteCharacter(schmaModel.getQuoteCharacter());
+ model.setCommentCharacter(schmaModel.getCommentCharacter());
model.setTaskNo(loadModel.getTaskNo());
model.setFactTimeStamp(loadModel.getFactTimeStamp());
model.setMaxColumns(loadModel.getMaxColumns());
@@ -180,6 +182,8 @@ public final class CarbonLoaderUtil {
schmaModel.setBlocksID(loadModel.getBlocksID());
schmaModel.setEscapeCharacter(loadModel.getEscapeChar());
+ schmaModel.setQuoteCharacter(loadModel.getQuoteChar());
+ schmaModel.setCommentCharacter(loadModel.getCommentChar());
SchemaInfo info = new SchemaInfo();
info.setDatabaseName(databaseName);
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
index 3580810..02b70d0 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -365,6 +365,15 @@ object GlobalDictionaryUtil extends Logging {
.option("ignoreLeadingWhiteSpace", "false")
.option("ignoreTrailingWhiteSpace", "false")
.option("codec", "gzip")
+ .option("quote", {
+ if (StringUtils.isEmpty(carbonLoadModel.getQuoteChar)) {
+ "" + CSVWriter. DEFAULT_QUOTE_CHARACTER
+ }
+ else {
+ carbonLoadModel.getQuoteChar
+ }
+ })
+ .option("comment", carbonLoadModel.getCommentChar)
.load(carbonLoadModel.getFactFilePath)
df
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index 3bc5f5c..f2df810 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -997,7 +997,7 @@ class CarbonSqlParser()
val supportedOptions = Seq("DELIMITER", "QUOTECHAR", "FILEHEADER", "ESCAPECHAR", "MULTILINE",
"COMPLEX_DELIMITER_LEVEL_1", "COMPLEX_DELIMITER_LEVEL_2", "COLUMNDICT",
"SERIALIZATION_NULL_FORMAT",
- "ALL_DICTIONARY_PATH", "MAXCOLUMNS"
+ "ALL_DICTIONARY_PATH", "MAXCOLUMNS", "COMMENTCHAR"
)
var isSupported = true
val invalidOptions = StringBuilder.newBuilder
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 1e06165..2047872 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -1091,6 +1091,7 @@ private[sql] case class LoadTable(
val quoteChar = partionValues.getOrElse("quotechar", "\"")
val fileHeader = partionValues.getOrElse("fileheader", "")
val escapeChar = partionValues.getOrElse("escapechar", "\\")
+ val commentchar = partionValues.getOrElse("commentchar", "#")
val columnDict = partionValues.getOrElse("columndict", null)
val serializationNullFormat = partionValues.getOrElse("serialization_null_format", "\\N")
val allDictionaryPath = partionValues.getOrElse("all_dictionary_path", "")
@@ -1107,6 +1108,8 @@ private[sql] case class LoadTable(
val maxColumns = partionValues.getOrElse("maxcolumns", null)
carbonLoadModel.setMaxColumns(maxColumns)
carbonLoadModel.setEscapeChar(escapeChar)
+ carbonLoadModel.setQuoteChar(quoteChar)
+ carbonLoadModel.setCommentChar(commentchar)
carbonLoadModel.setSerializationNullFormat("serialization_null_format" + "," +
serializationNullFormat)
if (delimiter.equalsIgnoreCase(complex_delimiter_level_1) ||
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/resources/comment.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/comment.csv b/integration/spark/src/test/resources/comment.csv
new file mode 100644
index 0000000..34ccb12
--- /dev/null
+++ b/integration/spark/src/test/resources/comment.csv
@@ -0,0 +1,5 @@
+.~carbon,.,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon,~carbon
+,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon,,carbon
+#?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon
+?carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon,#carbon
+".carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon,"carbon
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index da64b39..0cc2b9f 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -385,7 +385,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
sql(
"""
LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table t3
- options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='\')
+ options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='\')
"""
)
checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
@@ -406,7 +406,7 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
sql(
"""
LOAD DATA LOCAL INPATH './src/test/resources/datawithescapecharacter.csv' into table t3
- options ('DELIMITER'=',', 'QUOTECHAR'='\"','ESCAPECHAR'='@')
+ options ('DELIMITER'=',', 'QUOTECHAR'='"','ESCAPECHAR'='@')
"""
)
checkAnswer(sql("select count(*) from t3"), Seq(Row(21)))
@@ -590,6 +590,21 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
checkAnswer(sql("select * from carbontable1"), sql("select * from hivetable1"))
}
+ test("test data loading with comment option") {
+ sql("drop table if exists comment_test")
+ sql(
+ "create table comment_test(imei string, age int, task bigint, num double, level decimal(10," +
+ "3), productdate timestamp, mark int, name string) STORED BY 'org.apache.carbondata.format'"
+ )
+ sql(
+ "LOAD DATA local inpath './src/test/resources/comment.csv' INTO TABLE comment_test " +
+ "options('DELIMITER' = ',', 'QUOTECHAR' = '.', 'COMMENTCHAR' = '?','FILEHEADER'='imei,age,task,num,level,productdate,mark,name')"
+ )
+ checkAnswer(sql("select imei from comment_test"),Seq(Row("\".carbon"),Row("#?carbon"), Row(""),
+ Row("~carbon,")))
+ }
+
+
override def afterAll {
sql("drop table carbontable")
sql("drop table hivetable")
@@ -597,5 +612,6 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
sql("drop table if exists mixed_header_test")
sql("drop table carbontable1")
sql("drop table hivetable1")
+ sql("drop table if exists comment_test")
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
index a2685b1..44ca85e 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/ExternalColumnDictionaryTestCase.scala
@@ -140,6 +140,7 @@ class ExternalColumnDictionaryTestCase extends QueryTest with BeforeAndAfterAll
carbonLoadModel.setComplexDelimiterLevel1("\\$")
carbonLoadModel.setComplexDelimiterLevel2("\\:")
carbonLoadModel.setColDictFilePath(extColFilePath)
+ carbonLoadModel.setQuoteChar("\"");
carbonLoadModel
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
index 8468090..4108abe 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilConcurrentTestCase.scala
@@ -70,6 +70,7 @@ class GlobalDictionaryUtilConcurrentTestCase extends QueryTest with BeforeAndAft
carbonLoadModel.setComplexDelimiterLevel1("\\$")
carbonLoadModel.setComplexDelimiterLevel2("\\:")
carbonLoadModel.setStorePath(relation.tableMeta.storePath)
+ carbonLoadModel.setQuoteChar("\"")
carbonLoadModel
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
index 05c2715..32beeee 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtilTestCase.scala
@@ -69,6 +69,7 @@ class GlobalDictionaryUtilTestCase extends QueryTest with BeforeAndAfterAll {
carbonLoadModel.setComplexDelimiterLevel1("\\$")
carbonLoadModel.setComplexDelimiterLevel2("\\:")
carbonLoadModel.setStorePath(relation.tableMeta.storePath)
+ carbonLoadModel.setQuoteChar("\"")
carbonLoadModel
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
index 239457b..42a8382 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/api/dataloader/DataLoadModel.java
@@ -61,6 +61,10 @@ public class DataLoadModel {
private String escapeCharacter;
+ private String quoteCharacter;
+
+ private String commentCharacter;
+
private String maxColumns;
/**
* @return Returns the schemaInfo.
@@ -200,6 +204,15 @@ public class DataLoadModel {
this.escapeCharacter = escapeCharacter;
}
+ public String getQuoteCharacter() { return quoteCharacter; }
+
+ public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; }
+
+ public String getCommentCharacter() { return commentCharacter; }
+
+ public void setCommentCharacter(String commentCharacter) {
+ this.commentCharacter = commentCharacter;
+ }
/**
* @return
*/
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
index c01a682..86ec915 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInput.java
@@ -454,6 +454,8 @@ public class CsvInput extends BaseStep implements StepInterface {
csvParserVo.setNumberOfColumns(meta.getInputFields().length);
csvParserVo.setEscapeCharacter(meta.getEscapeCharacter());
csvParserVo.setHeaderPresent(meta.isHeaderPresent());
+ csvParserVo.setQuoteCharacter(meta.getQuoteCharacter());
+ csvParserVo.setCommentCharacter(meta.getCommentCharacter());
String maxColumns = meta.getMaxColumns();
if(null != maxColumns) {
csvParserVo.setMaxColumns(Integer.parseInt(maxColumns));
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
index 2e0dece..c5b801a 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/CsvInputMeta.java
@@ -99,6 +99,10 @@ public class CsvInputMeta extends BaseStepMeta
private String escapeCharacter;
+ private String quoteCharacter;
+
+ private String commentCharacter;
+
private String maxColumns;
public CsvInputMeta() {
@@ -121,6 +125,8 @@ public class CsvInputMeta extends BaseStepMeta
blocksID = "";
partitionID = "";
escapeCharacter ="\\";
+ quoteCharacter = "\"";
+ commentCharacter = "#";
}
private void readData(Node stepnode) throws KettleXMLException {
@@ -157,6 +163,8 @@ public class CsvInputMeta extends BaseStepMeta
blocksID = XMLHandler.getTagValue(stepnode, "blocksID");
partitionID = XMLHandler.getTagValue(stepnode, "partitionID");
escapeCharacter = XMLHandler.getTagValue(stepnode, "escapeCharacter");
+ quoteCharacter = XMLHandler.getTagValue(stepnode, "quoteCharacter");
+ commentCharacter = XMLHandler.getTagValue(stepnode, "commentCharacter");
maxColumns = XMLHandler.getTagValue(stepnode, "maxColumns");
Node fields = XMLHandler.getSubNode(stepnode, getXmlCode("FIELDS"));
int nrfields = XMLHandler.countNodes(fields, getXmlCode("FIELD"));
@@ -220,6 +228,8 @@ public class CsvInputMeta extends BaseStepMeta
retval.append(" ").append(XMLHandler.addTagValue("blocksID", blocksID));
retval.append(" ").append(XMLHandler.addTagValue("partitionID", partitionID));
retval.append(" ").append(XMLHandler.addTagValue("escapeCharacter", escapeCharacter));
+ retval.append(" ").append(XMLHandler.addTagValue("quoteCharacter", quoteCharacter));
+ retval.append(" ").append(XMLHandler.addTagValue("commentCharacter", commentCharacter));
retval.append(" ").append(XMLHandler.addTagValue("maxColumns", maxColumns));
retval.append(" ").append(XMLHandler.openTag(getXmlCode("FIELDS"))).append(Const.CR);
for (int i = 0; i < inputFields.length; i++) {
@@ -273,6 +283,8 @@ public class CsvInputMeta extends BaseStepMeta
blocksID = rep.getStepAttributeString(idStep, getRepCode("blocksID"));
partitionID = rep.getStepAttributeString(idStep, getRepCode("partitionID"));
escapeCharacter = rep.getStepAttributeString(idStep, getRepCode("escapeCharacter"));
+ quoteCharacter = rep.getStepAttributeString(idStep, getRepCode("quoteCharacter"));
+ commentCharacter = rep.getStepAttributeString(idStep, getRepCode("commentCharacter"));
maxColumns = rep.getStepAttributeString(idStep, getRepCode("maxColumns"));
int nrfields = rep.countNrStepAttributes(idStep, getRepCode("FIELD_NAME"));
@@ -329,6 +341,10 @@ public class CsvInputMeta extends BaseStepMeta
rep.saveStepAttribute(idTransformation, idStep, getRepCode("partitionID"), partitionID);
rep.saveStepAttribute(idTransformation, idStep, getRepCode("escapeCharacter"),
escapeCharacter);
+ rep.saveStepAttribute(idTransformation, idStep, getRepCode("quoteCharacter"),
+ quoteCharacter);
+ rep.saveStepAttribute(idTransformation, idStep, getRepCode("commentCharacter"),
+ commentCharacter);
rep.saveStepAttribute(idTransformation, idStep, getRepCode("maxColumns"),
maxColumns);
for (int i = 0; i < inputFields.length; i++) {
@@ -623,6 +639,16 @@ public class CsvInputMeta extends BaseStepMeta
this.escapeCharacter = escapeCharacter;
}
+ public String getQuoteCharacter() { return quoteCharacter; }
+
+ public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; }
+
+ public String getCommentCharacter() { return commentCharacter; }
+
+ public void setCommentCharacter(String commentCharacter) {
+ this.commentCharacter = commentCharacter;
+ }
+
public String getFileType() {
return "CSV";
}
@@ -828,6 +854,10 @@ public class CsvInputMeta extends BaseStepMeta
partitionID = (String) entry.getValue();
} else if ("escapeCharacter".equals(attributeKey)) {
escapeCharacter = (String) entry.getValue();
+ } else if ("quoteCharacter".equals(attributeKey)) {
+ quoteCharacter = (String) entry.getValue();
+ } else if ("commentCharacter".equals(attributeKey)) {
+ commentCharacter = (String) entry.getValue();
} else {
throw new RuntimeException(
"Unhandled metadata injection of attribute: " + attr.toString() + " - " + attr
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
index 49f17dc..89eec54 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParser.java
@@ -93,6 +93,7 @@ public class UnivocityCsvParser {
public void initialize() throws IOException {
CsvParserSettings parserSettings = new CsvParserSettings();
parserSettings.getFormat().setDelimiter(csvParserVo.getDelimiter().charAt(0));
+ parserSettings.getFormat().setComment(csvParserVo.getCommentCharacter().charAt(0));
parserSettings.setLineSeparatorDetectionEnabled(true);
parserSettings.setMaxColumns(
getMaxColumnsForParsing(csvParserVo.getNumberOfColumns(), csvParserVo.getMaxColumns()));
@@ -100,6 +101,8 @@ public class UnivocityCsvParser {
parserSettings.setIgnoreLeadingWhitespaces(false);
parserSettings.setIgnoreTrailingWhitespaces(false);
parserSettings.setSkipEmptyLines(false);
+ parserSettings.getFormat().setQuote(null == csvParserVo.getQuoteCharacter() ?
+ '\"':csvParserVo.getQuoteCharacter().charAt(0));
parserSettings.getFormat().setQuoteEscape(null == csvParserVo.getEscapeCharacter() ?
'\\' :
csvParserVo.getEscapeCharacter().charAt(0));
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
index 623cac3..5383309 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/csvreaderstep/UnivocityCsvParserVo.java
@@ -71,6 +71,16 @@ public class UnivocityCsvParserVo {
private String escapeCharacter;
/**
+ * quote character;
+ */
+ private String quoteCharacter;
+
+ /**
+ * comment character;
+ */
+ private String commentCharacter;
+
+ /**
* max number of columns configured by user to be parsed in a row
*/
private int maxColumns;
@@ -187,6 +197,17 @@ public class UnivocityCsvParserVo {
this.escapeCharacter = escapeCharacter;
}
+ public String getQuoteCharacter() { return quoteCharacter; }
+
+ public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; }
+
+ public String getCommentCharacter() { return commentCharacter; }
+
+ public void setCommentCharacter(String commentCharacter) {
+ this.commentCharacter = commentCharacter;
+ }
+
+
/**
* @return
*/
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
index bc09f89..a0fb157 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/DataProcessTaskStatus.java
@@ -87,6 +87,10 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable {
private String escapeCharacter;
+ private String quoteCharacter;
+
+ private String commentCharacter;
+
public DataProcessTaskStatus(String databaseName, String tableName) {
this.databaseName = databaseName;
this.tableName = tableName;
@@ -283,4 +287,14 @@ public class DataProcessTaskStatus implements IDataProcessStatus, Serializable {
public void setEscapeCharacter(String escapeCharacter) {
this.escapeCharacter = escapeCharacter;
}
+
+ public String getQuoteCharacter() { return quoteCharacter; }
+
+ public void setQuoteCharacter(String quoteCharacter) { this.quoteCharacter = quoteCharacter; }
+
+ public String getCommentCharacter() { return commentCharacter; }
+
+ public void setCommentCharacter(String commentCharacter) {
+ this.commentCharacter = commentCharacter;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
index 22cada5..56dfe00 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/dataprocessor/IDataProcessStatus.java
@@ -189,4 +189,8 @@ public interface IDataProcessStatus {
String getBlocksID();
String getEscapeCharacter();
+
+ String getQuoteCharacter();
+
+ String getCommentCharacter();
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cb3b5e4/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
index f55c247..b10841f 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/graphgenerator/GraphGenerator.java
@@ -185,6 +185,8 @@ public class GraphGenerator {
private String factStoreLocation;
private String blocksID;
private String escapeCharacter;
+ private String quoteCharacter;
+ private String commentCharacter;
/**
* task id, each spark task has a unique id
*/
@@ -219,6 +221,8 @@ public class GraphGenerator {
this.isColumnar = Boolean.parseBoolean(CarbonCommonConstants.IS_COLUMNAR_STORAGE_DEFAULTVALUE);
this.blocksID = dataLoadModel.getBlocksID();
this.taskNo = dataLoadModel.getTaskNo();
+ this.quoteCharacter = dataLoadModel.getQuoteCharacter();
+ this.commentCharacter = dataLoadModel.getCommentCharacter();
this.factTimeStamp = dataLoadModel.getFactTimeStamp();
this.segmentId = segmentId;
this.escapeCharacter = dataLoadModel.getEscapeCharacter();
@@ -450,6 +454,8 @@ public class GraphGenerator {
csvInputMeta.setBlocksID(this.blocksID);
csvInputMeta.setPartitionID(this.partitionID);
csvInputMeta.setEscapeCharacter(this.escapeCharacter);
+ csvInputMeta.setQuoteCharacter(this.quoteCharacter);
+ csvInputMeta.setCommentCharacter(this.commentCharacter);
csvDataStep.setDraw(true);
csvDataStep.setDescription("Read raw data from " + GraphGeneratorConstants.CSV_INPUT);
[2/2] incubator-carbondata git commit: [CARBONDATA-201]add comment
option This closes #118
Posted by ch...@apache.org.
[CARBONDATA-201]add comment option This closes #118
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/2af709eb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/2af709eb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/2af709eb
Branch: refs/heads/master
Commit: 2af709eb9b5ec438432a819cb5a30b82ffcb32c6
Parents: 4aa2241 4cb3b5e
Author: chenliang613 <ch...@apache.org>
Authored: Wed Sep 7 20:08:07 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Sep 7 20:08:07 2016 +0800
----------------------------------------------------------------------
.../hadoop/test/util/StoreCreator.java | 4 +++
.../carbondata/spark/load/CarbonLoadModel.java | 30 ++++++++++++++++++++
.../carbondata/spark/load/CarbonLoaderUtil.java | 4 +++
.../spark/util/GlobalDictionaryUtil.scala | 9 ++++++
.../org/apache/spark/sql/CarbonSqlParser.scala | 2 +-
.../execution/command/carbonTableSchema.scala | 3 ++
.../spark/src/test/resources/comment.csv | 5 ++++
.../dataload/TestLoadDataWithHiveSyntax.scala | 20 +++++++++++--
.../util/ExternalColumnDictionaryTestCase.scala | 1 +
...GlobalDictionaryUtilConcurrentTestCase.scala | 1 +
.../util/GlobalDictionaryUtilTestCase.scala | 1 +
.../api/dataloader/DataLoadModel.java | 13 +++++++++
.../processing/csvreaderstep/CsvInput.java | 2 ++
.../processing/csvreaderstep/CsvInputMeta.java | 30 ++++++++++++++++++++
.../csvreaderstep/UnivocityCsvParser.java | 3 ++
.../csvreaderstep/UnivocityCsvParserVo.java | 21 ++++++++++++++
.../dataprocessor/DataProcessTaskStatus.java | 14 +++++++++
.../dataprocessor/IDataProcessStatus.java | 4 +++
.../graphgenerator/GraphGenerator.java | 6 ++++
19 files changed, 170 insertions(+), 3 deletions(-)
----------------------------------------------------------------------