You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2020/04/05 10:42:11 UTC
[carbondata] branch master updated: [CARBONDATA-3744] Fix select query failure issue when warehouse directory is default (not configured) in cluster

This is an automated email from the ASF dual-hosted git repository.

jackylk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 55af615  [CARBONDATA-3744] Fix select query failure issue when warehouse directory is default (not configured) in cluster
55af615 is described below

commit 55af6153a1296c2813ed8534faf51a9f9a6ab7d3
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Fri Mar 20 11:14:04 2020 +0530

    [CARBONDATA-3744] Fix select query failure issue when warehouse directory is default (not configured) in cluster
    
    Why is this PR needed?
    
    select query fails when warehouse directory is default (not configured) with below callstak.
    
    0: jdbc:hive2://localhost:10000> create table ab(age int) stored as carbondata;
    ---------+
    Result
    ---------+
    ---------+
    No rows selected (0.093 seconds)
    0: jdbc:hive2://localhost:10000> select count from ab;
    Error: org.apache.spark.sql.catalyst.analysis.NoSuchTableException: Table or view 'ab' not found in database 'tpch'; (state=,code=0)
    
    caused by
    java.io.FileNotFoundException: File hdfs://localhost:54311/home/root1/tools/spark-2.3.4-bin-hadoop2.7/spark-warehouse/tpch.db/ab/Metadata does not exist.
    
    What changes were proposed in this PR?
    
    When the spark.sql.warehouse.dir is not configured, default local file system SPARK_HOME is used. But the describe table shows with HDFS prefix in cluster.
    
    Reason is we are removing the local filesystem scheme , so when table path is read we add HDFS prefix in cluster. instead if we keep the scheme issue will not come.
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    No. Happens only in cluster with HDFS or OBS.
    
    This closes #3675
---
 .../carbondata/core/statusmanager/SegmentStatusManager.java       | 2 +-
 .../spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala     | 8 +++-----
 .../org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala  | 5 +++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
index c2d6db0..0c1b5f7 100755
--- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
+++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
@@ -601,7 +601,7 @@ public class SegmentStatusManager {
     }
     // If process crashed during following write, table status file need to be
     // manually recovered.
-    writeStringIntoFile(tableStatusPath, content);
+    writeStringIntoFile(FileFactory.getUpdatedFilePath(tableStatusPath), content);
   }
 
   // a dummy func for mocking in testcase, which simulates IOException
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
index 849fa1f..fd461bd 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
@@ -319,9 +319,8 @@ object CarbonEnv {
     // check whether the carbon store and hive store is same or different.
     if ((!EnvHelper.isLegacy(sparkSession)) &&
         (dbName.equals("default") || databaseLocation.endsWith(".db"))) {
-      val carbonStorePath = FileFactory.getUpdatedFilePath(CarbonProperties.getStorePath())
-      val hiveStorePath = FileFactory.getUpdatedFilePath(
-        sparkSession.conf.get("spark.sql.warehouse.dir", carbonStorePath))
+      val carbonStorePath = CarbonProperties.getStorePath()
+      val hiveStorePath = sparkSession.conf.get("spark.sql.warehouse.dir", carbonStorePath)
       // if carbon.store does not point to spark.sql.warehouse.dir then follow the old table path
       // format
       if (carbonStorePath != null && !hiveStorePath.equals(carbonStorePath)) {
@@ -330,8 +329,7 @@ object CarbonEnv {
                            dbName
       }
     }
-
-    FileFactory.getUpdatedFilePath(databaseLocation)
+    databaseLocation
   }
 
   /**
diff --git a/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala b/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
index 280ae74..8b7a082 100644
--- a/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
+++ b/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/TestAllOperationsOnMV.scala
@@ -28,6 +28,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.core.cache.CacheProvider
 import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datastore.impl.FileFactory
 import org.apache.carbondata.core.metadata.CarbonMetadata
 import org.apache.carbondata.spark.exception.ProcessMetaDataException
 
@@ -646,8 +647,8 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach {
     val dbPath = CarbonEnv
       .getDatabaseLocation(tableIdentifier.database.get, sqlContext.sparkSession)
     val tablePath = carbonTable.getTablePath
-    val mvPath = dbPath + CarbonCommonConstants.FILE_SEPARATOR + "dm" +
-                 CarbonCommonConstants.FILE_SEPARATOR
+    val mvPath = FileFactory.getUpdatedFilePath(dbPath) + CarbonCommonConstants.FILE_SEPARATOR +
+                 "dm" + CarbonCommonConstants.FILE_SEPARATOR
 
     // Check if table index entries are dropped
     assert(droppedCacheKeys.asScala.exists(key => key.startsWith(tablePath)))