You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/02/12 05:00:13 UTC
[carbondata] branch master updated: [HOTFIX] Allow space between input file paths

This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 7661750  [HOTFIX] Allow space between input file paths
7661750 is described below

commit 766175062523a7981bffaee25d26023c886c6d61
Author: Manhua <ke...@qq.com>
AuthorDate: Wed Feb 5 16:05:58 2020 +0800

    [HOTFIX] Allow space between input file paths
    
    Why is this PR needed?
    When space exists between file paths, The input file does not exist is
    thrown.
    
    LOAD DATA INPATH 'hdfs:///data/file1.dat , hdfs:///data/file2.dat' INTO
    TABLE ...
    
    What changes were proposed in this PR?
    trim space of each file path before usage
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #3604
---
 .../dataload/MultiFilesDataLoagdingTestCase.scala  | 22 ++++++++++++++++------
 .../scala/org/apache/spark/util/FileUtils.scala    |  2 +-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/MultiFilesDataLoagdingTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/MultiFilesDataLoagdingTestCase.scala
index 14a0d5c..73d57fd 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/MultiFilesDataLoagdingTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/MultiFilesDataLoagdingTestCase.scala
@@ -19,16 +19,17 @@ package org.apache.carbondata.spark.testsuite.dataload
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.util.QueryTest
-import org.scalatest.BeforeAndAfterAll
+import org.scalatest.BeforeAndAfterEach
 
 /**
  * Test Class for data loading with hive syntax and old syntax
  *
  */
-class MultiFilesDataLoagdingTestCase extends QueryTest with BeforeAndAfterAll {
+class MultiFilesDataLoagdingTestCase extends QueryTest with BeforeAndAfterEach {
 
-  override def beforeAll {
-    sql("create table multifile(empno int, empname String, designation string, doj String," +
+  override def beforeEach {
+    sql("DROP TABLE IF EXISTS multifile")
+    sql("CREATE TABLE multifile(empno int, empname String, designation string, doj String," +
       "workgroupcategory int, workgroupcategoryname String,deptno int, deptname String," +
       "projectcode int, projectjoindate String,projectenddate String, attendance double," +
       "utilization double,salary double) STORED AS carbondata")
@@ -43,7 +44,16 @@ class MultiFilesDataLoagdingTestCase extends QueryTest with BeforeAndAfterAll {
     )
   }
 
-  override def afterAll {
-    sql("drop table multifile")
+  test("test data loading multiple files") {
+    val testData = s"$resourcesPath/loadMultiFiles/data.csv, $resourcesPath/loadMultiFiles/non-csv"
+    sql(s"LOAD DATA LOCAL INPATH '$testData' into table multifile")
+    checkAnswer(
+      sql("select count(empno) from multifile"),
+      Seq(Row(5))
+    )
+  }
+
+  override def afterEach {
+    sql("DROP TABLE IF EXISTS multifile")
   }
 }
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/util/FileUtils.scala b/integration/spark-common/src/main/scala/org/apache/spark/util/FileUtils.scala
index 4186dcb..c37b10a 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/util/FileUtils.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/util/FileUtils.scala
@@ -73,7 +73,7 @@ object FileUtils {
       throw new DataLoadingException("Input file path cannot be empty.")
     } else {
       val stringBuild = new StringBuilder()
-      val filePaths = inputPath.split(",")
+      val filePaths = inputPath.split(",").map(_.trim)
       for (i <- 0 until filePaths.size) {
         val filePath = CarbonUtil.checkAndAppendHDFSUrl(filePaths(i))
         val carbonFile = FileFactory.getCarbonFile(filePath, hadoopConf)