You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/07/20 10:14:03 UTC

[35/50] [abbrv] incubator-carbondata git commit: [CARBONDATA-65][Bug] Data load fails if spaces in FILEHEADER option in load command (#834)

[CARBONDATA-65][Bug] Data load fails if spaces in FILEHEADER option in load command (#834)

Data load fails if space in the header names in FILEHEADER option in load command .

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/df03dbbd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/df03dbbd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/df03dbbd

Branch: refs/heads/master
Commit: df03dbbd2077c3845537b375c4659dc5bf96020a
Parents: 31d824d
Author: manishgupta88 <to...@gmail.com>
Authored: Mon Jul 18 18:23:11 2016 +0530
Committer: Venkata Ramana G <g....@gmail.com>
Committed: Mon Jul 18 18:23:11 2016 +0530

----------------------------------------------------------------------
 .../spark/util/GlobalDictionaryUtil.scala       |  3 +-
 .../dataload/TestLoadDataWithHiveSyntax.scala   |  6 ++--
 .../processing/csvload/DataGraphExecuter.java   | 35 --------------------
 .../processing/csvload/GraphExecutionUtil.java  | 28 ----------------
 4 files changed, 5 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
index 25093f8..8ad1204 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -674,12 +674,13 @@ object GlobalDictionaryUtil extends Logging {
         logInfo("Generate global dictionary from source data files!")
         // load data by using dataSource com.databricks.spark.csv
         var df = loadDataFrame(sqlContext, carbonLoadModel)
-        val headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
+        var headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
           df.columns
         }
         else {
           carbonLoadModel.getCsvHeader.split("" + CSVWriter.DEFAULT_SEPARATOR)
         }
+        headers = headers.map(headerName => headerName.trim)
         val colDictFilePath = carbonLoadModel.getColDictFilePath
         if (colDictFilePath != null) {
           // generate predefined dictionary

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
index 73d3a32..df75c4b 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntax.scala
@@ -109,9 +109,9 @@ class TestLoadDataWithHiveSyntax extends QueryTest with BeforeAndAfterAll {
     //load data into test cube and hive table and validate query result
     sql(
       "LOAD DATA local inpath './src/test/resources/datawithoutheader.csv' INTO table testtable1 " +
-        "options('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='EMPno,empname,designation,doj," +
-        "workgroupcategory,workgroupcategoryname,deptno,deptname,projectcode,projectjoindate," +
-        "projectenddate,attendance,utilization,SALARY')"
+        "options('DELIMITER'=',', 'QUOTECHAR'='\"', 'FILEHEADER'='EMPno, empname,designation,doj," +
+        "workgroupcategory,workgroupcategoryname,   deptno,deptname,projectcode,projectjoindate," +
+        "projectenddate,  attendance,   utilization,SALARY')"
     )
     sql(
       "LOAD DATA local inpath './src/test/resources/datawithoutheader.csv' overwrite INTO table " +

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
index ca21ec1..91f5aee 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/DataGraphExecuter.java
@@ -81,17 +81,6 @@ public class DataGraphExecuter {
   }
 
   /**
-   * This method check whether then CSV file has header or not.
-   *
-   * @param columnNames
-   * @param csvFilePath
-   * @return
-   */
-  private boolean checkHeaderExist(String[] columnNames, String csvFilePath, String delimiter) {
-    return GraphExecutionUtil.checkHeaderExist(csvFilePath, columnNames, delimiter);
-  }
-
-  /**
    * This Method checks whether csv file provided and the column name in schema are same
    * or not
    *
@@ -149,13 +138,6 @@ public class DataGraphExecuter {
               + "CSVFile Name : "
               + f.getName());
     }
-
-    if (!checkHeaderExist(columnNames, f.getAbsolutePath(), delimiter)) {
-      LOGGER.error("Header Columns are not present in the provided CSV File :" + f.getName());
-      throw new DataLoadingException(DataProcessorConstants.CSV_VALIDATION_ERRROR_CODE,
-          "Header Columns are not present in the provided CSV File:" + f.getName());
-
-    }
   }
 
   public void executeGraph(String graphFilePath, List<String> measureColumns, SchemaInfo schemaInfo,
@@ -648,16 +630,6 @@ public class DataGraphExecuter {
               + dimFile.getName());
     }
 
-    if (!checkDimHeaderExist(columnNames, dimFile.getAbsolutePath(), delimiter)) {
-      LOGGER.error(
-          "Header Columns are not present in the provided CSV File For Dimension Table Load :"
-              + dimFile.getName());
-      throw new DataLoadingException(DataProcessorConstants.CSV_VALIDATION_ERRROR_CODE,
-          "Header Columns are not present in the provided CSV File For Dimension Table Load :"
-              + dimFile.getName());
-
-    }
-
   }
 
   /**
@@ -672,13 +644,6 @@ public class DataGraphExecuter {
   }
 
   /**
-   * Check the dimension csv file is having all the dimension.
-   */
-  private boolean checkDimHeaderExist(String[] columnNames, String dimFilePath, String delimiter) {
-    return GraphExecutionUtil.checkHeaderExist(dimFilePath, columnNames, delimiter);
-  }
-
-  /**
    * Interrupts all child threads run by kettle to execute the graph
    */
   public void interruptGraphExecution() {

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/df03dbbd/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
index 6a66817..b0b74c4 100644
--- a/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
+++ b/processing/src/main/java/org/carbondata/processing/csvload/GraphExecutionUtil.java
@@ -253,34 +253,6 @@ public final class GraphExecutionUtil {
 
   /**
    * @param csvFilePath
-   * @param columnNames
-   */
-  public static boolean checkHeaderExist(String csvFilePath, String[] columnNames,
-      String delimiter) {
-
-    String readLine = readCSVFile(csvFilePath);
-
-    if (null != readLine) {
-      String[] columnFromCSV = readLine.toLowerCase().split(delimiter);
-
-      List<String> csvColumnsList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
-
-      for (String column : columnFromCSV) {
-        csvColumnsList.add(column.replaceAll("\"", ""));
-      }
-
-      for (String columns : columnNames) {
-        if (csvColumnsList.contains(columns)) {
-          return true;
-        }
-      }
-    }
-
-    return false;
-  }
-
-  /**
-   * @param csvFilePath
    * @return
    */
   private static String readCSVFile(String csvFilePath) {