You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/01/22 12:36:26 UTC
[carbondata] branch master updated: [CARBONDATA-3243] Updated DOC
for No-Sort Compaction and a few Fixes
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new aadfbeb [CARBONDATA-3243] Updated DOC for No-Sort Compaction and a few Fixes
aadfbeb is described below
commit aadfbeb6e4d7544f925ef6af66bd9e6ba275ca68
Author: namanrastogi <na...@gmail.com>
AuthorDate: Thu Jan 10 14:40:23 2019 +0530
[CARBONDATA-3243] Updated DOC for No-Sort Compaction and a few Fixes
Updated Doc
Checking SORT_SCOPE in session property CARBON.TABLE.LOAD.SORT.SCOPE in CarbonTable.getSortScope()
Changed default Sort Scope in SortScopeOptions.getSortScope()
Validation for Load Option SORT_SCOPE
Add the iterator in priority queue only of record is found in iterator, else not.
This closes #3064
---
.../carbondata/core/constants/SortScopeOptions.java | 2 +-
.../core/metadata/schema/table/CarbonTable.java | 20 ++++++++++++++------
docs/configuration-parameters.md | 1 +
docs/dml-of-carbondata.md | 9 ++++++++-
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 11 +++++++++++
.../sortdata/SingleThreadFinalSortFilesMerger.java | 2 +-
6 files changed, 36 insertions(+), 9 deletions(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/SortScopeOptions.java b/core/src/main/java/org/apache/carbondata/core/constants/SortScopeOptions.java
index 9225bb4..fe7b4e9 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/SortScopeOptions.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/SortScopeOptions.java
@@ -36,7 +36,7 @@ public class SortScopeOptions {
case "NO_SORT":
return SortScope.NO_SORT;
default:
- return SortScope.LOCAL_SORT;
+ return getSortScope(CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT);
}
}
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index f89dd6c..c4adab4 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -1357,12 +1357,20 @@ public class CarbonTable implements Serializable {
if (getNumberOfSortColumns() == 0) {
return SortScopeOptions.SortScope.NO_SORT;
} else {
- return SortScopeOptions.getSortScope(
- CarbonProperties.getInstance().getProperty(
- CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
- CarbonProperties.getInstance().getProperty(
- CarbonCommonConstants.LOAD_SORT_SCOPE,
- CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT)));
+ // Check SORT_SCOPE in Session Properties first.
+ String sortScopeSessionProp = CarbonProperties.getInstance().getProperty(
+ CarbonLoadOptionConstants.CARBON_TABLE_LOAD_SORT_SCOPE + getDatabaseName() + "."
+ + getTableName());
+ if (null != sortScopeSessionProp) {
+ return SortScopeOptions.getSortScope(sortScopeSessionProp);
+ }
+
+ // If SORT_SCOPE is not found in Session Properties,
+ // then retrieve it from Table.
+ return SortScopeOptions.getSortScope(CarbonProperties.getInstance()
+ .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
+ CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, "LOCAL_SORT")));
}
} else {
return SortScopeOptions.getSortScope(sortScope);
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index 105b768..c7d8152 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -208,6 +208,7 @@ RESET
| carbon.options.date.format | Specifies the data format of the date columns in the data being loaded |
| carbon.options.timestamp.format | Specifies the timestamp format of the time stamp columns in the data being loaded |
| carbon.options.sort.scope | Specifies how the current data load should be sorted with. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.sort.scope for detailed information. |
+| carbon.table.load.sort.scope | Overrides the SORT_SCOPE provided in CREATE TABLE. |
| carbon.options.global.sort.partitions | |
| carbon.options.serialization.null.format | Default Null value representation in the data being loaded. **NOTE:** Refer to [Data Loading Configuration](#data-loading-configuration)#carbon.options.serialization.null.format for detailed information. |
| carbon.query.directQueryOnDataMap.enabled | Specifies whether datamap can be queried directly. This is useful for debugging purposes.**NOTE: **Refer to [Query Configuration](#query-configuration) for detailed information. |
diff --git a/docs/dml-of-carbondata.md b/docs/dml-of-carbondata.md
index c8d72ef..d6e5932 100644
--- a/docs/dml-of-carbondata.md
+++ b/docs/dml-of-carbondata.md
@@ -49,6 +49,7 @@ CarbonData DML statements are documented here,which includes:
| [COMMENTCHAR](#commentchar) | Character used to comment the rows in the input csv file. Those rows will be skipped from processing |
| [HEADER](#header) | Whether the input csv files have header row |
| [FILEHEADER](#fileheader) | If header is not present in the input csv, what is the column names to be used for data read from input csv |
+| [SORT_SCOPE](#sort_scope) | Sort Scope to be used for current load. |
| [MULTILINE](#multiline) | Whether a row data can span across multiple lines. |
| [ESCAPECHAR](#escapechar) | Escape character used to excape the data in input csv file.For eg.,\ is a standard escape character |
| [SKIP_EMPTY_LINE](#skip_empty_line) | Whether empty lines in input csv file should be skipped or loaded as null row |
@@ -106,6 +107,13 @@ CarbonData DML statements are documented here,which includes:
OPTIONS('FILEHEADER'='column1,column2')
```
+ - ##### SORT_SCOPE:
+ Sort Scope to be used for the current load. This overrides the Sort Scope of Table.
+
+ ```
+ OPTIONS('SORT_SCOPE'='BATCH_SORT')
+ ```
+
- ##### MULTILINE:
CSV with new line character in quotes.
@@ -467,4 +475,3 @@ CarbonData DML statements are documented here,which includes:
```
CLEAN FILES FOR TABLE carbon_table
```
-
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 523d59c..dc75243 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -35,6 +35,7 @@ import org.apache.carbondata.common.constants.LoggerAction
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.exception.InvalidConfigurationException
import org.apache.carbondata.core.metadata.datatype.DataTypes
import org.apache.carbondata.core.metadata.schema.PartitionInfo
import org.apache.carbondata.core.metadata.schema.partition.PartitionType
@@ -1217,6 +1218,16 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
}
}
+ // Validate SORT_SCOPE
+ if (options.exists(_._1.equalsIgnoreCase("SORT_SCOPE"))) {
+ val optionValue: String = options.get("sort_scope").get.head._2
+ if (!CarbonUtil.isValidSortOption(optionValue)) {
+ throw new InvalidConfigurationException(
+ s"Passing invalid SORT_SCOPE '$optionValue', valid SORT_SCOPE are 'NO_SORT'," +
+ s" 'BATCH_SORT', 'LOCAL_SORT' and 'GLOBAL_SORT' ")
+ }
+ }
+
// check for duplicate options
val duplicateOptions = options filter {
case (_, optionList) => optionList.size > 1
diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SingleThreadFinalSortFilesMerger.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SingleThreadFinalSortFilesMerger.java
index d243749..bd9526f 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SingleThreadFinalSortFilesMerger.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SingleThreadFinalSortFilesMerger.java
@@ -135,8 +135,8 @@ public class SingleThreadFinalSortFilesMerger extends CarbonIterator<Object[]> {
noDicAndComplexColumns, sortParameters, measureDataType);
if (inMemorySortTempChunkHolder.hasNext()) {
inMemorySortTempChunkHolder.readRow();
+ recordHolderHeapLocal.add(inMemorySortTempChunkHolder);
}
- recordHolderHeapLocal.add(inMemorySortTempChunkHolder);
}
}