You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/08/01 10:05:04 UTC
[06/47] incubator-carbondata git commit: [CARBONDATA-94] fixed load
data when csv file record delimiter is '|' and data with header
[CARBONDATA-94] fixed load data when csv file record delimiter is '|' and data with header
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/4cff504c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/4cff504c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/4cff504c
Branch: refs/heads/master
Commit: 4cff504cfb538fb7a3ebf3755df2ea968eb79b8b
Parents: 154a4d3
Author: Gin-zhj <zh...@huawei.com>
Authored: Fri Jul 22 21:18:53 2016 +0800
Committer: Kumar Vishal <ku...@gmail.com>
Committed: Fri Jul 22 18:48:53 2016 +0530
----------------------------------------------------------------------
.../spark/src/test/resources/datadelimiter.csv | 11 +++++++
.../dataload/TestLoadDataWithHiveSyntax.scala | 32 +++++++++++++++++++-
.../processing/csvload/GraphExecutionUtil.java | 31 +++++++++++++++++++
3 files changed, 73 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cff504c/integration/spark/src/test/resources/datadelimiter.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/datadelimiter.csv b/integration/spark/src/test/resources/datadelimiter.csv
new file mode 100644
index 0000000..bc06817
--- /dev/null
+++ b/integration/spark/src/test/resources/datadelimiter.csv
@@ -0,0 +1,11 @@
+empno|empname|designation|doj|workgroupcategory|workgroupcategoryname|deptno|deptname|projectcode|projectjoindate|projectenddate|attendance|utilization|salary
+11|arvind|SE|17-01-2007|1|developer|10|network|928478|17-02-2007|29-11-2016|96|96.2|5040.56
+12|krithin|SSE|29-05-2008|1|developer|11|protocol|928378|29-06-2008|30-12-2016|85|95.1|7124.21
+13|madhan|TPL|07-07-2009|2|tester|10|network|928478|07-08-2009|30-12-2016|88|99|9054.235
+14|anandh|SA|29-12-2010|3|manager|11|protocol|928278|29-01-2011|29-06-2016|77|92.2|11248.25
+15|ayushi|SSA|09-11-2011|1|developer|12|security|928375|09-12-2011|29-05-2016|99|91.5|13245.48
+16|pramod|SE|14-10-2012|1|developer|13|configManagement|928478|14-11-2012|29-12-2016|86|93|5040.56
+17|gawrav|PL|22-09-2013|2|tester|12|security|928778|22-10-2013|15-11-2016|78|97.45|9574.24
+18|sibi|TL|15-08-2014|2|tester|14|Learning|928176|15-09-2014|29-05-2016|84|98.23|7245.25
+19|shivani|PL|12-05-2015|1|developer|10|network|928977|12-06-2015|12-11-2016|88|91.678|11254.24
+20|bill|PM|01-12-2015|3|manager|14|Learning|928479|01-01-2016|30-11-2016|75|94.22|13547.25
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cff504c/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index df75c4b..99bae17 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -47,6 +47,9 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
"projectcode int, projectjoindate String,projectenddate String, attendance String," +
"utilization String,salary String)row format delimited fields terminated by ','"
)
+
+ sql("drop table if exists carbontable1")
+ sql("drop table if exists hivetable1")
}
test("test data loading and validate query output") {
@@ -547,11 +550,38 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
checkAnswer(sql("select id from t3 where salary = 15000"),Seq(Row(1)))
}
+ test("test data loading when delimiter is '|' and data with header") {
+ sql(
+ "CREATE table carbontable1 (empno int, empname String, designation String, doj String, " +
+ "workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, " +
+ "projectcode int, projectjoindate String, projectenddate String,attendance double," +
+ "utilization double,salary double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES" +
+ "('DICTIONARY_EXCLUDE'='empno,empname,designation,doj,workgroupcategory," +
+ "workgroupcategoryname,deptno,deptname,projectcode,projectjoindate,projectenddate')"
+ )
+ sql(
+ "create table hivetable1 (empno int, empname String, designation string, doj String, " +
+ "workgroupcategory int, workgroupcategoryname String,deptno int, deptname String, " +
+ "projectcode int, projectjoindate String,projectenddate String, attendance double," +
+ "utilization double,salary double)row format delimited fields terminated by ','"
+ )
+
+ sql(
+ "LOAD DATA local inpath './src/test/resources/datadelimiter.csv' INTO TABLE carbontable1 OPTIONS" +
+ "('DELIMITER'= '|', 'QUOTECHAR'= '\"')"
+ )
+
+ sql("LOAD DATA local inpath './src/test/resources/datawithoutheader.csv' INTO table hivetable1")
+
+ checkAnswer(sql("select * from carbontable1"), sql("select * from hivetable1"))
+ }
+
override def afterAll {
sql("drop table carbontable")
sql("drop table hivetable")
sql("drop table if exists header_test")
sql("drop table if exists mixed_header_test")
-
+ sql("drop table carbontable1")
+ sql("drop table hivetable1")
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/4cff504c/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
index b0b74c4..27a1f5c 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
@@ -126,6 +126,7 @@ public final class GraphExecutionUtil {
}
if (null != readLine) {
+ delimiter = delimiterConverter(delimiter);
String[] columnNames = readLine.split(delimiter);
TextFileInputField[] textFileInputFields = new TextFileInputField[columnNames.length];
@@ -289,6 +290,7 @@ public final class GraphExecutionUtil {
String readLine = readCSVFile(csvFilePath);
if (null != readLine) {
+ delimiter = delimiterConverter(delimiter);
String[] columnFromCSV = readLine.toLowerCase().split(delimiter);
List<String> csvColumnsList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
@@ -325,4 +327,33 @@ public final class GraphExecutionUtil {
}
return columnNames;
}
+
+ /**
+ * special char delimiter Converter
+ *
+ * @param delimiter
+ * @return delimiter
+ */
+ public static String delimiterConverter(String delimiter) {
+ switch (delimiter) {
+ case "|":
+ case "*":
+ case ".":
+ case ":":
+ case "^":
+ case "\\":
+ case"$":
+ case "+":
+ case "?":
+ case "(":
+ case ")":
+ case "{":
+ case "}":
+ case "[":
+ case "]":
+ return "\\" + delimiter;
+ default:
+ return delimiter;
+ }
+ }
}