You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/07/20 10:14:03 UTC
[35/50] [abbrv] incubator-carbondata git commit: [CARBONDATA-65][Bug]
Data load fails if spaces in FILEHEADER option in load command (#834)
[CARBONDATA-65][Bug] Data load fails if spaces in FILEHEADER option in load command (#834)
Data load fails if space in the header names in FILEHEADER option in load command .
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/df03dbbd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/df03dbbd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/df03dbbd
Branch: refs/heads/master
Commit: df03dbbd2077c3845537b375c4659dc5bf96020a
Parents: 31d824d
Author: manishgupta88 <to...@gmail.com>
Authored: Mon Jul 18 18:23:11 2016 +0530
Committer: Venkata Ramana G <g....@gmail.com>
Committed: Mon Jul 18 18:23:11 2016 +0530
----------------------------------------------------------------------
.../spark/util/GlobalDictionaryUtil.scala | 3 +-
.../dataload/TestLoadDataWithHiveSyntax.scala | 6 ++--
.../processing/csvload/DataGraphExecuter.java | 35 --------------------
.../processing/csvload/GraphExecutionUtil.java | 28 ----------------
4 files changed, 5 insertions(+), 67 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
index 25093f8..8ad1204 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -674,12 +674,13 @@ object GlobalDictionaryUtil extends Logging {
logInfo("Generate global dictionary from source data files!")
// load data by using dataSource com.databricks.spark.csv
var df = loadDataFrame(sqlContext, carbonLoadModel)
- val headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
+ var headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
df.columns
}
else {
carbonLoadModel.getCsvHeader.split("" + CSVWriter.DEFAULT_SEPARATOR)
}
+ headers = headers.map(headerName => headerName.trim)
val colDictFilePath = carbonLoadModel.getColDictFilePath
if (colDictFilePath != null) {
// generate predefined dictionary
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index 73d3a32..df75c4b 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -109,9 +109,9 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
//load data into test cube and hive table and validate query result
sql(
"LOAD DATA local inpath './src/test/resources/datawithoutheader.csv' INTO table testtable1 " +
- "options('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='EMPno,empname,designation,doj," +
- "workgroupcategory,workgroupcategoryname,deptno,deptname,projectcode,projectjoindate," +
- "projectenddate,attendance,utilization,SALARY')"
+ "options('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='EMPno, empname,designation,doj," +
+ "workgroupcategory,workgroupcategoryname, deptno,deptname,projectcode,projectjoindate," +
+ "projectenddate, attendance, utilization,SALARY')"
)
sql(
"LOAD DATA local inpath './src/test/resources/datawithoutheader.csv' overwrite INTO table " +
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
index ca21ec1..91f5aee 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
@@ -81,17 +81,6 @@ public class DataGraphExecuter {
}
/**
- * This method check whether then CSV file has header or not.
- *
- * @param columnNames
- * @param csvFilePath
- * @return
- */
- private boolean checkHeaderExist(String[] columnNames, String csvFilePath, String delimiter) {
- return GraphExecutionUtil.checkHeaderExist(csvFilePath, columnNames, delimiter);
- }
-
- /**
* This Method checks whether csv file provided and the column name in schema are same
* or not
*
@@ -149,13 +138,6 @@ public class DataGraphExecuter {
+ "CSVFile Name : "
+ f.getName());
}
-
- if (!checkHeaderExist(columnNames, f.getAbsolutePath(), delimiter)) {
- LOGGER.error("Header Columns are not present in the provided CSV File :" + f.getName());
- throw new DataLoadingException(DataProcessorConstants.CSV_VALIDATION_ERRROR_CODE,
- "Header Columns are not present in the provided CSV File:" + f.getName());
-
- }
}
public void executeGraph(String graphFilePath, List<String> measureColumns, SchemaInfo schemaInfo,
@@ -648,16 +630,6 @@ public class DataGraphExecuter {
+ dimFile.getName());
}
- if (!checkDimHeaderExist(columnNames, dimFile.getAbsolutePath(), delimiter)) {
- LOGGER.error(
- "Header Columns are not present in the provided CSV File For Dimension Table Load :"
- + dimFile.getName());
- throw new DataLoadingException(DataProcessorConstants.CSV_VALIDATION_ERRROR_CODE,
- "Header Columns are not present in the provided CSV File For Dimension Table Load :"
- + dimFile.getName());
-
- }
-
}
/**
@@ -672,13 +644,6 @@ public class DataGraphExecuter {
}
/**
- * Check the dimension csv file is having all the dimension.
- */
- private boolean checkDimHeaderExist(String[] columnNames, String dimFilePath, String delimiter) {
- return GraphExecutionUtil.checkHeaderExist(dimFilePath, columnNames, delimiter);
- }
-
- /**
* Interrupts all child threads run by kettle to execute the graph
*/
public void interruptGraphExecution() {
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
index 6a66817..b0b74c4 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
@@ -253,34 +253,6 @@ public final class GraphExecutionUtil {
/**
* @param csvFilePath
- * @param columnNames
- */
- public static boolean checkHeaderExist(String csvFilePath, String[] columnNames,
- String delimiter) {
-
- String readLine = readCSVFile(csvFilePath);
-
- if (null != readLine) {
- String[] columnFromCSV = readLine.toLowerCase().split(delimiter);
-
- List<String> csvColumnsList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
-
- for (String column : columnFromCSV) {
- csvColumnsList.add(column.replaceAll("\"", ""));
- }
-
- for (String columns : columnNames) {
- if (csvColumnsList.contains(columns)) {
- return true;
- }
- }
- }
-
- return false;
- }
-
- /**
- * @param csvFilePath
* @return
*/
private static String readCSVFile(String csvFilePath) {