You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/01/30 10:39:26 UTC
[carbondata] 22/27: [CARBONDATA-3273] [CARBONDATA-3274] Fix for
SORT_SCOPE in CarbonLoadDataCommand
This is an automated email from the ASF dual-hosted git repository.
ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git
commit 9373bc47e1a966405d3ecf611df526c20804b17f
Author: namanrastogi <na...@gmail.com>
AuthorDate: Fri Jan 25 15:19:58 2019 +0530
[CARBONDATA-3273] [CARBONDATA-3274] Fix for SORT_SCOPE in CarbonLoadDataCommand
Problem1: With no SORT_COLUMNS, loading data was taking SORT_SCOPE=LOCAL_SORT instead of NO_SORT.
Solution: Added a check for SORT_COLUMNS in CarbonLoadDataCommand
Problem2: On table with some SORT_COLUMNS and SORT_SCOPE not specified, SORT_SCOPE was not considering CARBON.OPTIONS.SORT.SCOPE for SORT_SCOPE.
Solution: Added checking of CARBON.OPTIONS.SORT.SCOPE while loading.
This closes #3103
---
docs/configuration-parameters.md | 2 +-
docs/dml-of-carbondata.md | 18 ++++++++++----
.../command/management/CarbonLoadDataCommand.scala | 28 +++++++++++-----------
3 files changed, 28 insertions(+), 20 deletions(-)
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index d28ad61..9f13e97 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -208,7 +208,7 @@ RESET
| carbon.options.date.format | Specifies the data format of the date columns in the data being loaded |
| carbon.options.timestamp.format | Specifies the timestamp format of the time stamp columns in the data being loaded |
| carbon.options.sort.scope | Specifies how the current data load should be sorted with. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.sort.scope for detailed information. |
-| carbon.table.load.sort.scope | Overrides the SORT_SCOPE provided in CREATE TABLE. |
+| carbon.table.load.sort.scope.<db>.<table> | Overrides the SORT_SCOPE provided in CREATE TABLE. |
| carbon.options.global.sort.partitions | |
| carbon.options.serialization.null.format | Default Null value representation in the data being loaded. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.options.serialization.null.format for detailed information. |
| carbon.query.directQueryOnDataMap.enabled | Specifies whether datamap can be queried directly. This is useful for debugging purposes.**NOTE: **Refer to [Query Configuration](#query-configuration) for detailed information. |
diff --git a/docs/dml-of-carbondata.md b/docs/dml-of-carbondata.md
index b3fe517..ec2c053 100644
--- a/docs/dml-of-carbondata.md
+++ b/docs/dml-of-carbondata.md
@@ -109,11 +109,19 @@ CarbonData DML statements are documented here,which includes:
```
- ##### SORT_SCOPE:
- Sort Scope to be used for the current load. This overrides the Sort Scope of Table.
-
- ```
- OPTIONS('SORT_SCOPE'='BATCH_SORT')
- ```
+ Sort Scope to be used for the current load. This overrides the Sort Scope of Table.
+ Requirement: Sort Columns must be set while creating table. If Sort Columns is null, Sort Scope is always NO_SORT.
+
+ ```
+ OPTIONS('SORT_SCOPE'='BATCH_SORT')
+ ```
+
+ Priority order for choosing Sort Scope is:
+ 1. Load Data Command
+ 2. CARBON.TABLE.LOAD.SORT.SCOPE.<db>.<table> session property
+ 3. Table level Sort Scope
+ 4. CARBON.OPTIONS.SORT.SCOPE session property
+ 5. Default Value: NO_SORT
- ##### MULTILINE:
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 7e3ea90..307e62d 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -200,26 +200,26 @@ case class CarbonLoadDataCommand(
* LOAD DATA INPATH 'data.csv' INTO TABLE tableName OPTIONS('sort_scope'='no_sort')
*
* 2. Session property CARBON_TABLE_LOAD_SORT_SCOPE ->
- * SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=no_sort
- * SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=batch_sort
* SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=local_sort
- * SET CARBON.TABLE.LOAD.SORT.SCOPE.database.table=global_sort
*
* 3. Sort Scope provided in TBLPROPERTIES
* 4. Session property CARBON_OPTIONS_SORT_SCOPE
* 5. Default Sort Scope LOAD_SORT_SCOPE
*/
- if (tableProperties.get(CarbonCommonConstants.SORT_COLUMNS) != null &&
- tableProperties.get(CarbonCommonConstants.SORT_SCOPE) == null) {
- // If there are Sort Columns given for the table and Sort Scope is not specified,
- // we will take it as whichever sort scope given or LOCAL_SORT as default
- optionsFinal
- .put(CarbonCommonConstants.SORT_SCOPE,
- carbonProperty
- .getProperty(
- CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE + table.getDatabaseName + "." +
- table.getTableName, carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
- SortScopeOptions.getSortScope("LOCAL_SORT").toString)))
+ if (StringUtils.isBlank(tableProperties.get(CarbonCommonConstants.SORT_COLUMNS))) {
+ // If tableProperties.SORT_COLUMNS is null
+ optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
+ SortScopeOptions.SortScope.NO_SORT.name)
+ } else if (StringUtils.isBlank(tableProperties.get(CarbonCommonConstants.SORT_SCOPE))) {
+ // If tableProperties.SORT_COLUMNS is not null
+ // and tableProperties.SORT_SCOPE is null
+ optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
+ options.getOrElse(CarbonCommonConstants.SORT_SCOPE,
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE +
+ table.getDatabaseName + "." + table.getTableName,
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
+ carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
+ SortScopeOptions.SortScope.LOCAL_SORT.name)))))
} else {
optionsFinal.put(CarbonCommonConstants.SORT_SCOPE,
options.getOrElse(CarbonCommonConstants.SORT_SCOPE,