You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by in...@apache.org on 2021/07/30 15:36:34 UTC

[carbondata] branch master updated: [CARBONDATA-4255] Prohibit Create/Drop Database when databaselocation is inconsistent

This is an automated email from the ASF dual-hosted git repository.

indhumuthumurugesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c81c7a  [CARBONDATA-4255] Prohibit Create/Drop Database when databaselocation is inconsistent
3c81c7a is described below

commit 3c81c7a7d475fe41262876da010175d66dd60235
Author: marchpure <ma...@126.com>
AuthorDate: Tue Jul 27 12:17:40 2021 +0800

    [CARBONDATA-4255] Prohibit Create/Drop Database when databaselocation is inconsistent
    
    Why is this PR needed?
    When carbon.storelocation and spark.sql.warehouse.dir are configured to
    different values, the databaselocation maybe inconsistent. When DROP DATABASE
    command is executed, maybe both location (carbon dblcation and hive
    dblocation) will be cleared, which may confuses the users
    
    What changes were proposed in this PR?
    Drop database is prohibited when database locaton is inconsistent
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #4186
---
 conf/carbon.properties.template                    |  3 --
 conf/dataload.properties.template                  |  4 --
 .../core/constants/CarbonCommonConstants.java      |  1 +
 docs/ddl-of-carbondata.md                          |  7 +++-
 docs/file-structure-of-carbondata.md               |  2 +-
 docs/s3-guide.md                                   |  6 +--
 .../spark/sql/execution/strategy/DDLHelper.scala   | 25 +++++++-----
 .../spark/sql/execution/strategy/DDLStrategy.scala |  3 +-
 .../execution/command/CarbonHiveCommands.scala     | 20 +++++++--
 .../deleteTable/TestDeleteTableNewDDL.scala        | 47 ++++++++++++++++++++++
 10 files changed, 90 insertions(+), 28 deletions(-)

diff --git a/conf/carbon.properties.template b/conf/carbon.properties.template
index eb635d6..d3163b0 100644
--- a/conf/carbon.properties.template
+++ b/conf/carbon.properties.template
@@ -17,9 +17,6 @@
 #
 
 #################### System Configuration ##################
-##Optional. Location where CarbonData will create the store, and write the data in its own format.
-##If not specified then it takes spark.sql.warehouse.dir path.
-#carbon.storelocation
 #Base directory for Data files
 #carbon.ddl.base.hdfs.url
 #Path where the bad records are stored
diff --git a/conf/dataload.properties.template b/conf/dataload.properties.template
index 6145d33..37ada68 100644
--- a/conf/dataload.properties.template
+++ b/conf/dataload.properties.template
@@ -16,10 +16,6 @@
 # limitations under the License.
 #
 
-#carbon store path
-# you should change to the code path of your local machine
-carbon.storelocation=/home/david/Documents/carbondata/examples/spark/target/store
-
 #csv delimiter character
 delimiter=,
 
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 1a8131f..7b2fddf 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -37,6 +37,7 @@ public final class CarbonCommonConstants {
   /**
    * location of the carbon member, hierarchy and fact files
    */
+  @Deprecated
   @CarbonProperty
   public static final String STORE_LOCATION = "carbon.storelocation";
 
diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 996f7c8..b05a54a 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -638,7 +638,12 @@ CarbonData DDL statements are documented here,which includes:
 
 
 ## CREATE DATABASE 
-  This function creates a new database. By default the database is created in Carbon store location, but you can also specify custom location.
+  This function creates a new database. By default the database is created in location 'spark.sql.warehouse.dir', but you can also specify custom location by configuring 'spark.sql.warehouse.dir', the configuration 'carbon.storelocation' has been deprecated.
+
+  **Note:**
+    For simplicity, we recommended you remove the configuration of carbon.storelocation. If carbon.storelocaiton and spark.sql.warehouse.dir are configured to different paths, exception will be thrown when CREATE DATABASE and DROP DATABASE to avoid inconsistent database location.
+
+
   ```
   CREATE DATABASE [IF NOT EXISTS] database_name [LOCATION path];
   ```
diff --git a/docs/file-structure-of-carbondata.md b/docs/file-structure-of-carbondata.md
index 6ffc331..f82ccf3 100644
--- a/docs/file-structure-of-carbondata.md
+++ b/docs/file-structure-of-carbondata.md
@@ -42,7 +42,7 @@ This document describes the what a CarbonData table looks like in a HDFS directo
 
 ## File Directory Structure
 
-The CarbonData files are stored in the location specified by the ***carbon.storelocation*** configuration (configured in carbon.properties; if not configured, the default is ../carbon.store).
+The CarbonData files are stored in the location specified by the ***spark.sql.warehouse.dir*** configuration (configured in carbon.properties; if not configured, the default is ../carbon.store).
 
   The file directory structure is as below: 
 
diff --git a/docs/s3-guide.md b/docs/s3-guide.md
index 94aebae..8e4a963 100644
--- a/docs/s3-guide.md
+++ b/docs/s3-guide.md
@@ -28,12 +28,12 @@ Carbondata relies on Hadoop provided S3 filesystem APIs to access Object stores.
 
 # Writing to Object Storage
 
-To store carbondata files onto Object Store, `carbon.storelocation` property will have 
-to be configured with Object Store path in CarbonProperties file. 
+To store carbondata files onto Object Store, `spark.sql.warehouse.dir` property will have 
+to be configured with Object Store path in spark-default.conf. 
 
 For example:
 ```
-carbon.storelocation=s3a://mybucket/carbonstore
+spark.sql.warehouse.dir=s3a://mybucket/carbonstore
 ```
 
 If the existing store location cannot be changed or only specific tables need to be stored 
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala
index b221769..fa9615d 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLHelper.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.execution.strategy
 
+import org.apache.commons.lang.StringUtils
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{CarbonParserUtil, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
@@ -33,7 +35,7 @@ import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
 import org.apache.spark.sql.parser.{CarbonSpark2SqlParser, CarbonSparkSqlParserUtil}
 import org.apache.spark.sql.types.DecimalType
 import org.apache.spark.sql.util.SparkSQLUtil
-import org.apache.spark.util.{CarbonReflectionUtils, FileUtils}
+import org.apache.spark.util.CarbonReflectionUtils
 
 import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.common.logging.LogServiceFactory
@@ -51,16 +53,17 @@ object DDLHelper {
       sparkSession: SparkSession): CreateDatabaseCommand = {
     ThreadLocalSessionInfo
       .setConfigurationToCurrentThread(sparkSession.sessionState.newHadoopConf())
-    if (!EnvHelper.isLegacy(sparkSession)) {
-      val databaseName = createDatabaseCommand.databaseName
-      val dbLocation = try {
-        CarbonEnv.getDatabaseLocation(databaseName, sparkSession)
-      } catch {
-        case _: NoSuchDatabaseException =>
-          CarbonProperties.getStorePath
+    if (!EnvHelper.isLegacy(sparkSession) && !createDatabaseCommand.path.isDefined) {
+      val carbonStorePath = CarbonProperties.getStorePath()
+      val sparkWarehouse = sparkSession.conf.get("spark.sql.warehouse.dir")
+      if (StringUtils.isNotEmpty(carbonStorePath) && StringUtils.isNotEmpty(sparkWarehouse)
+        && !carbonStorePath.equals(sparkWarehouse)) {
+        throw new InvalidOperationException("Create database is prohibited when" +
+          " database locaton is inconsistent, please don't configure " +
+          " carbon.storelocation and spark.sql.warehouse.dir to different values," +
+          s" carbon.storelocation is $carbonStorePath," +
+          s" while spark.sql.warehouse.dir is $sparkWarehouse")
       }
-      FileUtils
-        .createDatabaseDirectory(databaseName, dbLocation, sparkSession.sparkContext)
     }
     createDatabaseCommand
   }
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala
index e096295..f4f8c79 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/strategy/DDLStrategy.scala
@@ -183,8 +183,7 @@ object DDLStrategy extends SparkStrategy {
       // database
       case createDb: CreateDatabaseCommand =>
         ExecutedCommandExec(DDLHelper.createDatabase(createDb, sparkSession)) :: Nil
-      case drop@DropDatabaseCommand(dbName, ifExists, _)
-        if CarbonEnv.databaseLocationExists(dbName, sparkSession, ifExists) =>
+      case drop@DropDatabaseCommand(dbName, ifExists, _) =>
         ExecutedCommandExec(CarbonDropDatabaseCommand(drop)) :: Nil
       // explain
       case explain: ExplainCommand =>
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala
index e819b86..a3e3b41 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive.execution.command
 
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException
 import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException}
@@ -52,9 +53,10 @@ case class CarbonDropDatabaseCommand(command: DropDatabaseCommand)
           true
       })
     }
-    var databaseLocation = ""
+
+    var carbonDatabaseLocation = ""
     try {
-      databaseLocation = CarbonEnv.getDatabaseLocation(dbName, sparkSession)
+      carbonDatabaseLocation = CarbonEnv.getDatabaseLocation(dbName, sparkSession)
     } catch {
       case e: NoSuchDatabaseException =>
         // if database not found and ifExists true return empty
@@ -62,6 +64,18 @@ case class CarbonDropDatabaseCommand(command: DropDatabaseCommand)
           return rows
         }
     }
+
+    val hiveDatabaseLocation =
+      sparkSession.sessionState.catalog.getDatabaseMetadata(dbName).locationUri.toString
+
+    if (!carbonDatabaseLocation.equals(hiveDatabaseLocation)) {
+      throw new InvalidOperationException("Drop database is prohibited when" +
+        " database locaton is inconsistent, please don't configure " +
+        " carbon.storelocation and spark.sql.warehouse.dir to different values," +
+        s" carbon.storelocation is $carbonDatabaseLocation," +
+        s" while spark.sql.warehouse.dir is $hiveDatabaseLocation")
+    }
+
     // DropHiveDB command will fail if cascade is false and one or more table exists in database
     if (command.cascade && tablesInDB != null) {
       tablesInDB.foreach { tableName =>
@@ -69,7 +83,7 @@ case class CarbonDropDatabaseCommand(command: DropDatabaseCommand)
       }
     }
     rows = command.run(sparkSession)
-    CarbonUtil.dropDatabaseDirectory(databaseLocation)
+    CarbonUtil.dropDatabaseDirectory(carbonDatabaseLocation)
     rows
   }
 }
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/deleteTable/TestDeleteTableNewDDL.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/deleteTable/TestDeleteTableNewDDL.scala
index 3b84bd3..59b4ea8 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/deleteTable/TestDeleteTableNewDDL.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/deleteTable/TestDeleteTableNewDDL.scala
@@ -16,9 +16,16 @@
  */
 package org.apache.carbondata.spark.testsuite.deleteTable
 
+
+import java.io.File
+
+import org.apache.hadoop.hive.metastore.api.InvalidOperationException
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
 /**
  * test class for testing the create cube DDL.
  */
@@ -58,6 +65,46 @@ class TestDeleteTableNewDDL extends QueryTest with BeforeAndAfterAll {
     }.getMessage.contains("Database 'dropdb_test' not found"))
   }
 
+  test("test create database when dblocation is inconsistent") {
+    val dbName = "dropdb_test"
+    sql(s"drop database if exists $dbName cascade")
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION,
+      warehouse + File.separator + "carbonwarehouse")
+    val exception = intercept[InvalidOperationException] {
+      sql(s"create database $dbName")
+    }
+    assert(exception.getMessage.contains("Create database is prohibited when" +
+      " database locaton is inconsistent, please don't configure " +
+      " carbon.storelocation and spark.sql.warehouse.dir to different values"))
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION, warehouse)
+  }
+
+  test("test create database with location under" +
+    " different configuration of carbonstorelocation and spark.sql.warehouse.dir") {
+    val dbName = "dropdb_test"
+    sql(s"drop database if exists $dbName cascade")
+    val dblocaiton = warehouse + File.separator + dbName
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION,
+      warehouse + File.separator + "carbonwarehouse")
+    assert(sql(s"create database $dbName location '$dblocaiton'").collect().isEmpty)
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION, warehouse)
+  }
+
+  test("test drop database when dblocation is inconsistent") {
+    val dbName = "dropdb_test"
+    sql(s"drop database if exists $dbName cascade")
+    sql(s"create database $dbName")
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION,
+      warehouse + File.separator + "carbonwarehouse")
+    val exception = intercept[InvalidOperationException] {
+      sql(s"drop database if exists $dbName")
+    }
+    assert(exception.getMessage.contains("Drop database is prohibited when" +
+      " database locaton is inconsistent, please don't configure " +
+      " carbon.storelocation and spark.sql.warehouse.dir to different values"))
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION, warehouse)
+  }
+
   test("test drop database cascade command") {
     sql("drop database if exists testdb cascade")
     sql("create database testdb")