You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/01/30 10:39:26 UTC

[carbondata] 22/27: [CARBONDATA-3273] [CARBONDATA-3274] Fix for SORT_SCOPE in CarbonLoadDataCommand

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git

commit 9373bc47e1a966405d3ecf611df526c20804b17f
Author: namanrastogi <na...@gmail.com>
AuthorDate: Fri Jan 25 15:19:58 2019 +0530

    [CARBONDATA-3273] [CARBONDATA-3274] Fix for SORT_SCOPE in CarbonLoadDataCommand
    
    Problem1: With no SORT_COLUMNS, loading data was taking SORT_SCOPE=LOCAL_SORT instead of NO_SORT.
    Solution: Added a check for SORT_COLUMNS in CarbonLoadDataCommand
    
    Problem2: On table with some SORT_COLUMNS and SORT_SCOPE not specified, SORT_SCOPE was not considering CARBON.OPTIONS.SORT.SCOPE for SORT_SCOPE.
    Solution: Added checking of CARBON.OPTIONS.SORT.SCOPE while loading.
    
    This closes #3103
---
 docs/configuration-parameters.md                   |  2 +-
 docs/dml-of-carbondata.md                          | 18 ++++++++++----
 .../command/management/CarbonLoadDataCommand.scala | 28 +++++++++++-----------
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index d28ad61..9f13e97 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -208,7 +208,7 @@ RESET
 | carbon.options.date.format                 | Specifies the data format of the date columns in the data being loaded |
 | carbon.options.timestamp.format            | Specifies the timestamp format of the time stamp columns in the data being loaded |
 | carbon.options.sort.scope                 | Specifies how the current data load should be sorted with. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.sort.scope for detailed information. |
-| carbon.table.load.sort.scope              | Overrides the SORT_SCOPE provided in CREATE TABLE.           |
+| carbon.table.load.sort.scope.<db>.<table> | Overrides the SORT_SCOPE provided in CREATE TABLE.           |
 | carbon.options.global.sort.partitions     |                                                              |
 | carbon.options.serialization.null.format  | Default Null value representation in the data being loaded. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.options.serialization.null.format for detailed information. |
 | carbon.query.directQueryOnDataMap.enabled | Specifies whether datamap can be queried directly. This is useful for debugging purposes.**NOTE: **Refer to [Query Configuration](#query-configuration) for detailed information. |
diff --git a/docs/dml-of-carbondata.md b/docs/dml-of-carbondata.md
index b3fe517..ec2c053 100644
--- a/docs/dml-of-carbondata.md
+++ b/docs/dml-of-carbondata.md
@@ -109,11 +109,19 @@ CarbonData DML statements are documented here,which includes:
     ```
 
   - ##### SORT_SCOPE:
-  Sort Scope to be used for the current load. This overrides the Sort Scope of Table.
-
-  ```
-  OPTIONS('SORT_SCOPE'='BATCH_SORT')
-  ```
+    Sort Scope to be used for the current load. This overrides the Sort Scope of Table.
+    Requirement: Sort Columns must be set while creating table. If Sort Columns is null, Sort Scope is always NO_SORT.
+  
+    ```
+    OPTIONS('SORT_SCOPE'='BATCH_SORT')
+    ```
+    
+    Priority order for choosing Sort Scope is:
+    1. Load Data Command
+    2. CARBON.TABLE.LOAD.SORT.SCOPE.<db>.<table> session property
+    3. Table level Sort Scope
+    4. CARBON.OPTIONS.SORT.SCOPE session property
+    5. Default Value: NO_SORT
 
   - ##### MULTILINE:
 
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 7e3ea90..307e62d 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -200,26 +200,26 @@ case class CarbonLoadDataCommand(
     *     LOAD DATA INPATH 'data.csv' INTO TABLE tableName OPTIONS('sort_scope'='no_sort')
     *
     * 2. Session property CARBON_TABLE_LOAD_SORT_SCOPE  ->
-    *     SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=no_sort
-    *     SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=batch_sort
     *     SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=local_sort
-    *     SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=global_sort
     *
     * 3. Sort Scope provided in TBLPROPERTIES
     * 4. Session property CARBON_OPTIONS_SORT_SCOPE
     * 5. Default Sort Scope LOAD_SORT_SCOPE
     */
-    if (tableProperties.get(CarbonCommonConstants.SORT_COLUMNS) != null &&
-        tableProperties.get(CarbonCommonConstants.SORT_SCOPE) == null) {
-      // If there are Sort Columns given for the table and Sort Scope is not specified,
-      // we will take it as whichever sort scope given or LOCAL_SORT as default
-      optionsFinal
-        .put(CarbonCommonConstants.SORT_SCOPE,
-          carbonProperty
-            .getProperty(
-              CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE + table.getDatabaseName + "." +
-              table.getTableName, carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
-                SortScopeOptions.getSortScope("LOCAL_SORT").toString)))
+    if (StringUtils.isBlank(tableProperties.get(CarbonCommonConstants.SORT_COLUMNS))) {
+      // If tableProperties.SORT_COLUMNS is null
+      optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
+        SortScopeOptions.SortScope.NO_SORT.name)
+    } else if (StringUtils.isBlank(tableProperties.get(CarbonCommonConstants.SORT_SCOPE))) {
+      // If tableProperties.SORT_COLUMNS is not null
+      // and tableProperties.SORT_SCOPE is null
+      optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
+        options.getOrElse(CarbonCommonConstants.SORT_SCOPE,
+          carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE +
+            table.getDatabaseName + "." + table.getTableName,
+            carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
+              carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
+                SortScopeOptions.SortScope.LOCAL_SORT.name)))))
     } else {
       optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
         options.getOrElse(CarbonCommonConstants.SORT_SCOPE,