You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/01/25 13:21:04 UTC

[carbondata] branch master updated: [CARBONDATA-3264] Added SORT_SCOPE in ALTER TABLE SET

This is an automated email from the ASF dual-hosted git repository.

kumarvishal09 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e39ee1  [CARBONDATA-3264] Added SORT_SCOPE in ALTER TABLE SET
8e39ee1 is described below

commit 8e39ee113236b7c48b8a0a46777cafc771701d9f
Author: namanrastogi <na...@gmail.com>
AuthorDate: Tue Jan 22 11:42:40 2019 +0530

    [CARBONDATA-3264] Added SORT_SCOPE in ALTER TABLE SET
    
    Added SORT_SCOPE in ALTER TABLE SET Command.
    This command changes the SORT_SCOPE of table after table has been created.
    
    Usage:
    
    ALTER TABLE <table> SET TBLPROPERTIES('sort_scope'='no_sort')
    Restrictions:
    
    Cannot change SORT_SCOPE from NO_SORT to anything else when SORT_COLUMNS is empty.
    
    This closes #3094
---
 docs/ddl-of-carbondata.md                          | 58 +++++++++++-------
 .../org/apache/spark/util/AlterTableUtil.scala     | 33 +++++++++--
 .../restructure/AlterTableValidationTestCase.scala | 69 ++++++++++++++++++++++
 3 files changed, 134 insertions(+), 26 deletions(-)

diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md
index 4f9e47b..0d0e5bd 100644
--- a/docs/ddl-of-carbondata.md
+++ b/docs/ddl-of-carbondata.md
@@ -51,7 +51,7 @@ CarbonData DDL statements are documented here,which includes:
     * [RENAME COLUMN](#change-column-nametype)
     * [CHANGE COLUMN NAME/TYPE](#change-column-nametype)
     * [MERGE INDEXES](#merge-index)
-    * [SET/UNSET Local Dictionary Properties](#set-and-unset-for-local-dictionary-properties)
+    * [SET/UNSET](#set-and-unset)
   * [DROP TABLE](#drop-table)
   * [REFRESH TABLE](#refresh-table)
   * [COMMENTS](#table-and-column-comment)
@@ -634,7 +634,7 @@ CarbonData DDL statements are documented here,which includes:
 
   The following section introduce the commands to modify the physical or logical state of the existing table(s).
 
-   - ##### RENAME TABLE
+   - #### RENAME TABLE
    
      This command is used to rename the existing table.
      ```
@@ -648,7 +648,7 @@ CarbonData DDL statements are documented here,which includes:
      ALTER TABLE test_db.carbon RENAME TO test_db.carbonTable
      ```
 
-   - ##### ADD COLUMNS
+   - #### ADD COLUMNS
    
      This command is used to add a new column to the existing table.
      ```
@@ -676,7 +676,7 @@ Users can specify which columns to include and exclude for local dictionary gene
      ALTER TABLE carbon ADD COLUMNS (a1 STRING, b1 STRING) TBLPROPERTIES('LOCAL_DICTIONARY_INCLUDE'='a1','LOCAL_DICTIONARY_EXCLUDE'='b1')
      ```
 
-   - ##### DROP COLUMNS
+   - #### DROP COLUMNS
    
      This command is used to delete the existing column(s) in a table.
 
@@ -696,7 +696,7 @@ Users can specify which columns to include and exclude for local dictionary gene
 
      **NOTE:** Drop Complex child column is not supported.
 
-   - ##### CHANGE COLUMN NAME/TYPE
+   - #### CHANGE COLUMN NAME/TYPE
    
      This command is used to change column name and the data type from INT to BIGINT or decimal precision from lower to higher.
      Change of decimal data type from lower precision to higher precision will only be supported for cases where there is no data loss.
@@ -729,7 +729,8 @@ Users can specify which columns to include and exclude for local dictionary gene
      ```
 
      **NOTE:** Once the column is renamed, user has to take care about replacing the fileheader with the new name or changing the column header in csv file.
-- ##### MERGE INDEX
+   
+   - #### MERGE INDEX
 
      This command is used to merge all the CarbonData index files (.carbonindex) inside a segment to a single CarbonData index merge file (.carbonindexmerge). This enhances the first query performance.
 
@@ -747,23 +748,36 @@ Users can specify which columns to include and exclude for local dictionary gene
 
      * Merge index is not supported on streaming table.
 
-- ##### SET and UNSET for Local Dictionary Properties
-
-   When set command is used, all the newly set properties will override the corresponding old properties if exists.
-  
-   Example to SET Local Dictionary Properties:
-   ```
-   ALTER TABLE tablename SET TBLPROPERTIES('LOCAL_DICTIONARY_ENABLE'='false','LOCAL_DICTIONARY_THRESHOLD'='1000','LOCAL_DICTIONARY_INCLUDE'='column1','LOCAL_DICTIONARY_EXCLUDE'='column2')
-   ```
-   When Local Dictionary properties are unset, corresponding default values will be used for these properties.
+   - #### SET and UNSET
    
-   Example to UNSET Local Dictionary Properties:
-   ```
-   ALTER TABLE tablename UNSET TBLPROPERTIES('LOCAL_DICTIONARY_ENABLE','LOCAL_DICTIONARY_THRESHOLD','LOCAL_DICTIONARY_INCLUDE','LOCAL_DICTIONARY_EXCLUDE')
-   ```
-   
-   **NOTE:** For old tables, by default, local dictionary is disabled. If user wants local dictionary for these tables, user can enable/disable local dictionary for new data at their discretion. 
-   This can be achieved by using the alter table set command.
+     When set command is used, all the newly set properties will override the corresponding old properties if exists.
+  
+     - ##### Local Dictionary Properties
+       Example to SET Local Dictionary Properties:
+       ```
+       ALTER TABLE tablename SET TBLPROPERTIES('LOCAL_DICTIONARY_ENABLE'='false','LOCAL_DICTIONARY_THRESHOLD'='1000','LOCAL_DICTIONARY_INCLUDE'='column1','LOCAL_DICTIONARY_EXCLUDE'='column2')
+       ```
+       When Local Dictionary properties are unset, corresponding default values will be used for these properties.
+    
+       Example to UNSET Local Dictionary Properties:
+       ```
+       ALTER TABLE tablename UNSET TBLPROPERTIES('LOCAL_DICTIONARY_ENABLE','LOCAL_DICTIONARY_THRESHOLD','LOCAL_DICTIONARY_INCLUDE','LOCAL_DICTIONARY_EXCLUDE')
+       ```
+    
+       **NOTE:** For old tables, by default, local dictionary is disabled. If user wants local dictionary for these tables, user can enable/disable local dictionary for new data at their discretion.
+       This can be achieved by using the alter table set command.
+  
+     - ##### SORT SCOPE
+       Example to SET SORT SCOPE:
+       ```
+       ALTER TABLE tablename SET TBLPROPERTIES('SORT_SCOPE'='NO_SORT')
+       ```
+       When Sort Scope is unset, the default values (NO_SORT) will be used.
+    
+       Example to UNSET SORT SCOPE:
+       ```
+       ALTER TABLE tablename UNSET TBLPROPERTIES('SORT_SCOPE')
+       ```
 
 ### DROP TABLE
 
diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
index 045d2d5..1dc562dc 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
@@ -35,6 +35,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datamap.DataMapStoreManager
 import org.apache.carbondata.core.datastore.block.SegmentPropertiesAndSchemaHolder
 import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.exception.InvalidConfigurationException
 import org.apache.carbondata.core.locks.{CarbonLockUtil, ICarbonLock, LockUsage}
 import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier}
 import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl
@@ -360,6 +361,9 @@ object AlterTableUtil {
       // validate the range column properties
       validateRangeColumnProperties(carbonTable, lowerCasePropertiesMap)
 
+      // validate the Sort Scope
+      validateSortScopeProperty(carbonTable, lowerCasePropertiesMap)
+
       // below map will be used for cache invalidation. As tblProperties map is getting modified
       // in the next few steps the original map need to be retained for any decision making
       val existingTablePropertiesMap = mutable.Map(tblPropertiesMap.toSeq: _*)
@@ -387,11 +391,14 @@ object AlterTableUtil {
             // older tables. So no need to remove from table properties map for unset just to ensure
             // for older table behavior. So in case of unset, if enable property is already present
             // in map, then just set it to default value of local dictionary which is true.
-            if (!propKey.equalsIgnoreCase(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE)) {
-              tblPropertiesMap.remove(propKey.toLowerCase)
-            } else {
+            if (propKey.equalsIgnoreCase(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE)) {
               tblPropertiesMap
                 .put(propKey.toLowerCase, CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE_DEFAULT)
+            } else if (propKey.equalsIgnoreCase("sort_scope")) {
+              tblPropertiesMap
+                .put(propKey.toLowerCase, CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT)
+            } else {
+              tblPropertiesMap.remove(propKey.toLowerCase)
             }
           } else {
             val errorMessage = "Error: Invalid option(s): " + propKey
@@ -432,7 +439,8 @@ object AlterTableUtil {
       "LOCAL_DICTIONARY_INCLUDE",
       "LOCAL_DICTIONARY_EXCLUDE",
       "LOAD_MIN_SIZE_INMB",
-      "RANGE_COLUMN")
+      "RANGE_COLUMN",
+      "SORT_SCOPE")
     supportedOptions.contains(propKey.toUpperCase)
   }
 
@@ -534,6 +542,23 @@ object AlterTableUtil {
     }
   }
 
+  def validateSortScopeProperty(carbonTable: CarbonTable,
+      propertiesMap: mutable.Map[String, String]): Unit = {
+    propertiesMap.foreach { property =>
+      if (property._1.equalsIgnoreCase("SORT_SCOPE")) {
+        if (!CarbonUtil.isValidSortOption(property._2)) {
+          throw new MalformedCarbonCommandException(
+            s"Invalid SORT_SCOPE ${ property._2 }, valid SORT_SCOPE are 'NO_SORT', 'BATCH_SORT', " +
+            s"'LOCAL_SORT' and 'GLOBAL_SORT'")
+        } else if (!property._2.equalsIgnoreCase("NO_SORT") &&
+                   (carbonTable.getNumberOfSortColumns == 0)) {
+          throw new InvalidConfigurationException(
+            s"Cannot set SORT_SCOPE as ${ property._2 } when table has no SORT_COLUMNS")
+        }
+      }
+    }
+  }
+
   /**
    * This method will validate if there is any complex type column in the columns to be cached
    *
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
index b77fdc8..10afa87 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
@@ -775,6 +775,75 @@ test("test alter command for boolean data type with correct default measure valu
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timestampFormat)
   }
 
+
+  test("Alter Table Change Sort Scope 1") {
+    sql("DROP TABLE IF EXISTS t1")
+    sql(s"CREATE TABLE t1(age int, name string) STORED BY 'carbondata' TBLPROPERTIES" +
+        s"('sort_columns'='age', 'sort_scope'='local_sort')")
+    sql("ALTER TABLE t1 SET TBLPROPERTIES('sort_scope'='batch_sort')")
+    assert(sortScopeInDescFormatted("t1").equalsIgnoreCase("BATCH_SORT"))
+    sql("DROP TABLE t1")
+  }
+
+  test("Alter Table Change Sort Scope 2") {
+    sql("DROP TABLE IF EXISTS t1")
+    sql(s"CREATE TABLE t1(age int, name string) STORED BY 'carbondata' TBLPROPERTIES" +
+        s"('sort_columns'='age', 'sort_scope'='local_sort')")
+    sql("ALTER TABLE t1 SET TBLPROPERTIES('sort_scope'='no_sort')")
+    assert(sortScopeInDescFormatted("t1").equalsIgnoreCase("NO_SORT"))
+    sql("DROP TABLE t1")
+  }
+
+  test("Alter Table Change Sort Scope 3") {
+    sql("DROP TABLE IF EXISTS t1")
+    sql(s"CREATE TABLE t1(age int, name string) STORED BY 'carbondata' TBLPROPERTIES" +
+        s"('sort_columns'='')")
+
+    // This throws exception as SORT_COLUMNS is empty
+    intercept[RuntimeException] {
+      sql("ALTER TABLE t1 SET TBLPROPERTIES('sort_scope'='local_sort')")
+    }
+
+    // Even if we change the SORT_SCOPE to LOCAL_SORT
+    // the SORT_SCOPE should remain to NO_SORT
+    // because SORT_COLUMNS does not contain anything.
+    assert(sortScopeInDescFormatted("t1").equalsIgnoreCase("NO_SORT"))
+    sql("DROP TABLE t1")
+  }
+
+  test("Alter Table Change Sort Scope 4") {
+    sql("DROP TABLE IF EXISTS t1")
+    sql(s"CREATE TABLE t1(age int, name string) STORED BY 'carbondata' TBLPROPERTIES" +
+        s"('sort_columns'='age', 'sort_scope'='local_sort')")
+    sql("ALTER TABLE t1 UNSET TBLPROPERTIES('sort_scope')")
+
+    // Unsetting the SORT_SCOPE should change the SORT_SCOPE to
+    // CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT
+    assert(sortScopeInDescFormatted("t1")
+      .equalsIgnoreCase(CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT))
+    sql("DROP TABLE t1")
+  }
+
+  test("Alter Table Change Sort Scope 5") {
+    sql("DROP TABLE IF EXISTS t1")
+    sql(s"CREATE TABLE t1(age int, name string) STORED BY 'carbondata' TBLPROPERTIES" +
+        s"('sort_scope'='local_sort', 'sort_columns'='age')")
+    intercept[RuntimeException] {
+      sql("ALTER TABLE t1 SET TBLPROPERTIES('sort_scope'='fake_sort')")
+    }
+
+    // SORT_SCOPE should remain unchanged
+    assert(sortScopeInDescFormatted("t1").equalsIgnoreCase("LOCAL_SORT"))
+    sql("DROP TABLE t1")
+  }
+
+  def sortScopeInDescFormatted(tableName: String): String = {
+    sql(s"DESCRIBE FORMATTED $tableName").filter(
+      (x: Row) => x.getString(0).equalsIgnoreCase("sort scope")
+    ).collectAsList().get(0).get(1).toString
+  }
+
+
   override def afterAll {
     sql("DROP TABLE IF EXISTS restructure")
     sql("drop table if exists table1")