You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/06/27 14:08:07 UTC
[01/12] carbondata git commit: [CARBONDATA-1223] Fixing empty file
creation in batch sort loading
Repository: carbondata
Updated Branches:
refs/heads/encoding_override 30ef14e0d -> 3ecb3ec58
[CARBONDATA-1223] Fixing empty file creation in batch sort loading
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0205fa69
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0205fa69
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0205fa69
Branch: refs/heads/encoding_override
Commit: 0205fa6991e2b1d3f2a807121d15a6eeb9f07714
Parents: 30ef14e
Author: dhatchayani <dh...@gmail.com>
Authored: Fri Jun 23 19:24:47 2017 +0530
Committer: dhatchayani <dh...@gmail.com>
Committed: Fri Jun 23 19:24:51 2017 +0530
----------------------------------------------------------------------
.../UnsafeBatchParallelReadMergeSorterImpl.java | 16 +++++++++---
.../UnsafeSingleThreadFinalSortFilesMerger.java | 26 --------------------
.../steps/DataWriterBatchProcessorStepImpl.java | 18 ++++++++------
3 files changed, 23 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0205fa69/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/impl/UnsafeBatchParallelReadMergeSorterImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/impl/UnsafeBatchParallelReadMergeSorterImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/impl/UnsafeBatchParallelReadMergeSorterImpl.java
index 20a560d..a8d1eef 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/impl/UnsafeBatchParallelReadMergeSorterImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/impl/UnsafeBatchParallelReadMergeSorterImpl.java
@@ -155,7 +155,7 @@ public class UnsafeBatchParallelReadMergeSorterImpl extends AbstractMergeSorter
sortDataRows.getSortDataRow().addRowBatchWithOutSync(buffer, i);
rowCounter.getAndAdd(i);
if (!sortDataRows.getSortDataRow().canAdd()) {
- sortDataRows.finish();
+ sortDataRows.finish(false);
sortDataRows.createSortDataRows();
}
}
@@ -246,7 +246,7 @@ public class UnsafeBatchParallelReadMergeSorterImpl extends AbstractMergeSorter
return sortDataRow;
}
- public void finish() {
+ public void finish(boolean isFinalAttempt) {
try {
// if the mergerQue is empty and some CarbonDataLoadingException exception has occurred
// then set stop process to true in the finalmerger instance
@@ -254,6 +254,9 @@ public class UnsafeBatchParallelReadMergeSorterImpl extends AbstractMergeSorter
&& threadStatusObserver.getThrowable() != null && threadStatusObserver
.getThrowable() instanceof CarbonDataLoadingException) {
finalMerger.setStopProcess(true);
+ if (isFinalAttempt) {
+ iteratorCount.decrementAndGet();
+ }
mergerQueue.put(finalMerger);
return;
}
@@ -263,6 +266,9 @@ public class UnsafeBatchParallelReadMergeSorterImpl extends AbstractMergeSorter
finalMerger.startFinalMerge(rowPages.toArray(new UnsafeCarbonRowPage[rowPages.size()]),
unsafeIntermediateFileMerger.getMergedPages());
unsafeIntermediateFileMerger.close();
+ if (isFinalAttempt) {
+ iteratorCount.decrementAndGet();
+ }
mergerQueue.put(finalMerger);
sortDataRow = null;
unsafeIntermediateFileMerger = null;
@@ -284,8 +290,10 @@ public class UnsafeBatchParallelReadMergeSorterImpl extends AbstractMergeSorter
public void finishThread() {
synchronized (lock) {
- if (iteratorCount.decrementAndGet() <= 0) {
- finish();
+ if (iteratorCount.get() <= 1) {
+ finish(true);
+ } else {
+ iteratorCount.decrementAndGet();
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0205fa69/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/merger/UnsafeSingleThreadFinalSortFilesMerger.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/merger/UnsafeSingleThreadFinalSortFilesMerger.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/merger/UnsafeSingleThreadFinalSortFilesMerger.java
index acb976f..eb7af47 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/merger/UnsafeSingleThreadFinalSortFilesMerger.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/merger/UnsafeSingleThreadFinalSortFilesMerger.java
@@ -55,25 +55,6 @@ public class UnsafeSingleThreadFinalSortFilesMerger extends CarbonIterator<Objec
private SortParameters parameters;
/**
- * number of measures
- */
- private int measureCount;
-
- /**
- * number of dimensionCount
- */
- private int dimensionCount;
-
- /**
- * number of complexDimensionCount
- */
- private int noDictionaryCount;
-
- private int complexDimensionCount;
-
- private boolean[] isNoDictionaryDimensionColumn;
-
- /**
* tempFileLocation
*/
private String tempFileLocation;
@@ -85,13 +66,6 @@ public class UnsafeSingleThreadFinalSortFilesMerger extends CarbonIterator<Objec
public UnsafeSingleThreadFinalSortFilesMerger(SortParameters parameters,
String tempFileLocation) {
this.parameters = parameters;
- // set measure and dimension count
- this.measureCount = parameters.getMeasureColCount();
- this.dimensionCount = parameters.getDimColCount();
- this.complexDimensionCount = parameters.getComplexDimColCount();
-
- this.noDictionaryCount = parameters.getNoDictionaryCount();
- this.isNoDictionaryDimensionColumn = parameters.getNoDictionaryDimnesionColumn();
this.tempFileLocation = tempFileLocation;
this.tableName = parameters.getTableName();
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0205fa69/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/DataWriterBatchProcessorStepImpl.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/DataWriterBatchProcessorStepImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/DataWriterBatchProcessorStepImpl.java
index d58835c..46c1020 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/DataWriterBatchProcessorStepImpl.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/steps/DataWriterBatchProcessorStepImpl.java
@@ -82,13 +82,16 @@ public class DataWriterBatchProcessorStepImpl extends AbstractDataLoadProcessorS
int k = 0;
while (iterator.hasNext()) {
CarbonRowBatch next = iterator.next();
- CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel
- .createCarbonFactDataHandlerModel(configuration, storeLocation, i, k++);
- CarbonFactHandler dataHandler = CarbonFactHandlerFactory
- .createCarbonFactHandler(model, CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
- dataHandler.initialise();
- processBatch(next, dataHandler);
- finish(tableName, dataHandler);
+ // If no rows from merge sorter, then don't create a file in fact column handler
+ if (next.hasNext()) {
+ CarbonFactDataHandlerModel model = CarbonFactDataHandlerModel
+ .createCarbonFactDataHandlerModel(configuration, storeLocation, i, k++);
+ CarbonFactHandler dataHandler = CarbonFactHandlerFactory
+ .createCarbonFactHandler(model, CarbonFactHandlerFactory.FactHandlerType.COLUMNAR);
+ dataHandler.initialise();
+ processBatch(next, dataHandler);
+ finish(tableName, dataHandler);
+ }
}
i++;
}
@@ -137,6 +140,7 @@ public class DataWriterBatchProcessorStepImpl extends AbstractDataLoadProcessorS
dataHandler.addDataToStore(row);
batchSize++;
}
+ batch.close();
rowCounter.getAndAdd(batchSize);
}
[08/12] carbondata git commit: [CARBONDATA-1227] Remove useless
TableCreator This closes #1095
Posted by ja...@apache.org.
[CARBONDATA-1227] Remove useless TableCreator This closes #1095
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3dec25b7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3dec25b7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3dec25b7
Branch: refs/heads/encoding_override
Commit: 3dec25b7c7d9f32f9dd887d11b7a854709b20a9f
Parents: 22514b2 beee6de
Author: jackylk <ja...@huawei.com>
Authored: Mon Jun 26 20:46:03 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jun 26 20:46:03 2017 +0800
----------------------------------------------------------------------
.../org/apache/spark/sql/TableCreator.scala | 513 -------------------
1 file changed, 513 deletions(-)
----------------------------------------------------------------------
[05/12] carbondata git commit: delete incubating in docs
Posted by ja...@apache.org.
delete incubating in docs
delete incubating in docs
update docs
change version to x.x.x to skip version change in the future
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9f855f0e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9f855f0e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9f855f0e
Branch: refs/heads/encoding_override
Commit: 9f855f0e0cb66f88b37e980bfacc24fa01922abe
Parents: 8803da2
Author: chenerlu <ch...@huawei.com>
Authored: Tue Jun 20 14:51:34 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jun 24 17:43:45 2017 +0800
----------------------------------------------------------------------
docs/installation-guide.md | 6 +++---
integration/presto/README.md | 8 ++++----
2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/9f855f0e/docs/installation-guide.md
----------------------------------------------------------------------
diff --git a/docs/installation-guide.md b/docs/installation-guide.md
index d9f27dd..a0fc690 100644
--- a/docs/installation-guide.md
+++ b/docs/installation-guide.md
@@ -150,7 +150,7 @@ $SPARK_HOME/carbonlib/$CARBON_ASSEMBLY_JAR <carbon_store_path>
| Parameter | Description | Example |
|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------|
-| CARBON_ASSEMBLY_JAR | CarbonData assembly jar name present in the `$SPARK_HOME/carbonlib/` folder. | carbondata_2.10-0.1.0-incubating-SNAPSHOT-shade-hadoop2.7.2.jar |
+| CARBON_ASSEMBLY_JAR | CarbonData assembly jar name present in the `$SPARK_HOME/carbonlib/` folder. | carbondata_2.xx-x.x.x-SNAPSHOT-shade-hadoop2.7.2.jar |
| carbon_store_path | This is a parameter to the CarbonThriftServer class. This a HDFS path where CarbonData files will be kept. Strongly Recommended to put same as carbon.storelocation parameter of carbon.properties. | `hdfs://<host_name>:port/user/hive/warehouse/carbon.store` |
**Examples**
@@ -162,7 +162,7 @@ $SPARK_HOME/carbonlib/$CARBON_ASSEMBLY_JAR <carbon_store_path>
--conf spark.sql.hive.thriftServer.singleSession=true
--class org.apache.carbondata.spark.thriftserver.CarbonThriftServer
$SPARK_HOME/carbonlib
-/carbondata_2.10-0.1.0-incubating-SNAPSHOT-shade-hadoop2.7.2.jar
+/carbondata_2.xx-x.x.x-SNAPSHOT-shade-hadoop2.7.2.jar
hdfs://<host_name>:port/user/hive/warehouse/carbon.store
```
@@ -174,7 +174,7 @@ hdfs://<host_name>:port/user/hive/warehouse/carbon.store
--num-executors 3 --driver-memory 20g --executor-memory 250g
--executor-cores 32
/srv/OSCON/BigData/HACluster/install/spark/sparkJdbc/lib
-/carbondata_2.10-0.1.0-incubating-SNAPSHOT-shade-hadoop2.7.2.jar
+/carbondata_2.xx-x.x.x-SNAPSHOT-shade-hadoop2.7.2.jar
hdfs://<host_name>:port/user/hive/warehouse/carbon.store
```
http://git-wip-us.apache.org/repos/asf/carbondata/blob/9f855f0e/integration/presto/README.md
----------------------------------------------------------------------
diff --git a/integration/presto/README.md b/integration/presto/README.md
index 8a7cd13..9935478 100644
--- a/integration/presto/README.md
+++ b/integration/presto/README.md
@@ -61,17 +61,17 @@ Please follow the below steps to query carbondata in presto
First:compile carbondata-presto integration module
```
- $ git clone https://github.com/apache/incubator-carbondata
- $ cd incubator-carbondata/integration/presto
+ $ git clone https://github.com/apache/carbondata
+ $ cd carbondata/integration/presto
$ mvn clean package
```
Second:create one folder "carbondata" under ./presto-server-0.166/plugin
- Third:copy all jar from ./incubator-carbondata/integration/presto/target/carbondata-presto-1.1.0-incubating-SNAPSHOT
+ Third:copy all jar from ./carbondata/integration/presto/target/carbondata-presto-x.x.x-SNAPSHOT
to ./presto-server-0.166/plugin/carbondata
### Generate CarbonData file
-Please refer to quick start : https://github.com/apache/incubator-carbondata/blob/master/docs/quick-start-guide.md
+Please refer to quick start : https://github.com/apache/carbondata/blob/master/docs/quick-start-guide.md
### Query carbondata in CLI of presto
* Download presto-cli-0.166-executable.jar
[07/12] carbondata git commit: just remove table TableCreator because
nobody will call it
Posted by ja...@apache.org.
just remove table TableCreator because nobody will call it
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/beee6dec
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/beee6dec
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/beee6dec
Branch: refs/heads/encoding_override
Commit: beee6decea29ff5052956eda0580b8a4b77480fc
Parents: 22514b2
Author: chenerlu <ch...@huawei.com>
Authored: Mon Jun 26 11:33:50 2017 +0800
Committer: chenerlu <ch...@huawei.com>
Committed: Mon Jun 26 11:33:50 2017 +0800
----------------------------------------------------------------------
.../org/apache/spark/sql/TableCreator.scala | 513 -------------------
1 file changed, 513 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/beee6dec/integration/spark2/src/main/scala/org/apache/spark/sql/TableCreator.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/TableCreator.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/TableCreator.scala
deleted file mode 100644
index 3dd05b9..0000000
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/TableCreator.scala
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.util.regex.{Matcher, Pattern}
-
-import scala.collection.mutable.{LinkedHashSet, Map}
-
-import org.apache.spark.sql.execution.command._
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.metadata.datatype.DataType
-import org.apache.carbondata.core.util.{CarbonUtil, DataTypeUtil}
-import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
-import org.apache.carbondata.spark.util.CommonUtil
-
-object TableCreator {
-
- // detects whether complex dimension is part of dictionary_exclude
- def isComplexDimDictionaryExclude(dimensionDataType: String): Boolean = {
- val dimensionTypes = Array("array", "arraytype", "struct", "structtype")
- dimensionTypes.exists(dimensionType => dimensionType.equalsIgnoreCase(dimensionDataType))
- }
-
- // detects whether datatype is part of dictionary_exclude
- def isDataTypeSupportedForDictionary_Exclude(columnDataType: String): Boolean = {
- val dataTypes = Array("string", "stringtype")
- dataTypes.exists(dataType => dataType.equalsIgnoreCase(columnDataType))
- }
-
- // detect dimention data type
- def isDetectAsDimentionDatatype(dimensionDatatype: String): Boolean = {
- val dimensionTypes =
- Array("string", "stringtype", "array", "arraytype", "struct",
- "structtype", "timestamp", "timestamptype", "date", "datetype")
- dimensionTypes.exists(dimensionType => dimensionType.equalsIgnoreCase(dimensionDatatype))
- }
-
- protected def extractDimAndMsrFields(fields: Seq[Field],
- tableProperties: Map[String, String]): (Seq[Field], Seq[Field], Seq[String], Seq[String]) = {
- var dimFields: LinkedHashSet[Field] = LinkedHashSet[Field]()
- var msrFields: Seq[Field] = Seq[Field]()
- var dictExcludeCols: Array[String] = Array[String]()
- var noDictionaryDims: Seq[String] = Seq[String]()
- var dictIncludeCols: Seq[String] = Seq[String]()
-
- // All columns in sortkey should be there in create table cols
- val sortKeyOption = tableProperties.get(CarbonCommonConstants.SORT_COLUMNS)
- var sortKeyDimsTmp: Seq[String] = Seq[String]()
- val sortKeyString: String = if (sortKeyOption.isDefined) {
- CarbonUtil.unquoteChar(sortKeyOption.get) trim
- } else {
- ""
- }
- if (!sortKeyString.isEmpty) {
- val sortKey = sortKeyString.split(',').map(_.trim)
- sortKey.foreach { column =>
- if (!fields.exists(x => x.column.equalsIgnoreCase(column))) {
- val errormsg = "sort_columns: " + column +
- " does not exist in table. Please check create table statement."
- throw new MalformedCarbonCommandException(errormsg)
- } else {
- val dataType = fields.find(x =>
- x.column.equalsIgnoreCase(column)).get.dataType.get
- if (isComplexDimDictionaryExclude(dataType)) {
- val errormsg = "sort_columns is unsupported for complex datatype column: " + column
- throw new MalformedCarbonCommandException(errormsg)
- }
- }
- }
-
- sortKey.foreach { dimension =>
- if (!sortKeyDimsTmp.exists(dimension.equalsIgnoreCase(_))) {
- fields.foreach { field =>
- if (field.column.equalsIgnoreCase(dimension)) {
- sortKeyDimsTmp :+= field.column
- }
- }
- }
- }
- }
-
- // All excluded cols should be there in create table cols
- if (tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).isDefined) {
- dictExcludeCols =
- tableProperties.get(CarbonCommonConstants.DICTIONARY_EXCLUDE).get.split(',').map(_.trim)
- dictExcludeCols
- .foreach { dictExcludeCol =>
- if (!fields.exists(field => field.column.equalsIgnoreCase(dictExcludeCol))) {
- val errormsg = "DICTIONARY_EXCLUDE column: " + dictExcludeCol +
- " does not exist in table. Please check create table statement."
- throw new MalformedCarbonCommandException(errormsg)
- } else {
- val dataType = fields.find(field =>
- field.column.equalsIgnoreCase(dictExcludeCol)).get.dataType.get
- if (isComplexDimDictionaryExclude(dataType)) {
- val errormsg = "DICTIONARY_EXCLUDE is unsupported for complex datatype column: " +
- dictExcludeCol
- throw new MalformedCarbonCommandException(errormsg)
- } else if (!isDataTypeSupportedForDictionary_Exclude(dataType)) {
- val errorMsg = "DICTIONARY_EXCLUDE is unsupported for " + dataType.toLowerCase() +
- " data type column: " + dictExcludeCol
- throw new MalformedCarbonCommandException(errorMsg)
- }
- }
- }
- }
- // All included cols should be there in create table cols
- if (tableProperties.get(CarbonCommonConstants.DICTIONARY_INCLUDE).isDefined) {
- dictIncludeCols =
- tableProperties(CarbonCommonConstants.DICTIONARY_INCLUDE).split(",").map(_.trim)
- dictIncludeCols.foreach { distIncludeCol =>
- if (!fields.exists(field => field.column.equalsIgnoreCase(distIncludeCol.trim))) {
- val errormsg = "DICTIONARY_INCLUDE column: " + distIncludeCol.trim +
- " does not exist in table. Please check create table statement."
- throw new MalformedCarbonCommandException(errormsg)
- }
- }
- }
-
- // include cols should not contain exclude cols
- dictExcludeCols.foreach { dicExcludeCol =>
- if (dictIncludeCols.exists(col => col.equalsIgnoreCase(dicExcludeCol))) {
- val errormsg = "DICTIONARY_EXCLUDE can not contain the same column: " + dicExcludeCol +
- " with DICTIONARY_INCLUDE. Please check create table statement."
- throw new MalformedCarbonCommandException(errormsg)
- }
- }
-
- // by default consider all String cols as dims and if any dictionary exclude is present then
- // add it to noDictionaryDims list. consider all dictionary excludes/include cols as dims
- fields.foreach { field =>
- if (dictExcludeCols.toSeq.exists(col => col.equalsIgnoreCase(field.column))) {
- val dataType = DataTypeUtil.getDataType(field.dataType.get.toUpperCase())
- if (dataType != DataType.TIMESTAMP && dataType != DataType.DATE) {
- noDictionaryDims :+= field.column
- }
- dimFields += field
- } else if (dictIncludeCols.exists(col => col.equalsIgnoreCase(field.column))) {
- dimFields += field
- } else if (isDetectAsDimentionDatatype(field.dataType.get)) {
- dimFields += field
- } else if (sortKeyDimsTmp.exists(x => x.equalsIgnoreCase(field.column))) {
- noDictionaryDims :+= field.column
- dimFields += field
- } else {
- msrFields :+= field
- }
- }
-
- var sortKeyDims = sortKeyDimsTmp
- if (sortKeyOption.isEmpty) {
- // if SORT_COLUMNS was not defined, add all dimension to SORT_COLUMNS.
- dimFields.foreach { field =>
- if (!isComplexDimDictionaryExclude(field.dataType.get)) {
- sortKeyDims :+= field.column
- }
- }
- }
- if (sortKeyDims.isEmpty) {
- // no SORT_COLUMNS
- tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, "")
- } else {
- tableProperties.put(CarbonCommonConstants.SORT_COLUMNS, sortKeyDims.mkString(","))
- }
- (dimFields.toSeq, msrFields, noDictionaryDims, sortKeyDims)
- }
-
- def getKey(parentColumnName: Option[String],
- columnName: String): (String, String) = {
- if (parentColumnName.isDefined) {
- if (columnName == "val") {
- (parentColumnName.get, parentColumnName.get + "." + columnName)
- } else {
- (parentColumnName.get + "." + columnName, parentColumnName.get + "." + columnName)
- }
- } else {
- (columnName, columnName)
- }
- }
-
- protected def fillColumnProperty(
- parentColumnName: Option[String],
- columnName: String,
- tableProperties: Map[String, String],
- colPropMap: java.util.HashMap[String, java.util.List[ColumnProperty]]) {
- val (tblPropKey, colProKey) = getKey(parentColumnName, columnName)
- val colProps = CommonUtil.getColumnProperties(tblPropKey, tableProperties)
- if (colProps.isDefined) {
- colPropMap.put(colProKey, colProps.get)
- }
- }
-
- protected def fillAllChildrenColumnProperty(
- parent: String,
- fieldChildren: Option[List[Field]],
- tableProperties: Map[String, String],
- colPropMap: java.util.HashMap[String, java.util.List[ColumnProperty]]) {
- fieldChildren.foreach { fields =>
- fields.foreach { field =>
- fillColumnProperty(Some(parent), field.column, tableProperties, colPropMap)
- }
- }
- }
-
- protected def extractColumnProperties(fields: Seq[Field], tableProperties: Map[String, String]):
- java.util.Map[String, java.util.List[ColumnProperty]] = {
- val colPropMap = new java.util.HashMap[String, java.util.List[ColumnProperty]]()
- fields.foreach { field =>
- if (field.children.isDefined && field.children.get != null) {
- fillAllChildrenColumnProperty(field.column, field.children, tableProperties, colPropMap)
- } else {
- fillColumnProperty(None, field.column, tableProperties, colPropMap)
- }
- }
- colPropMap
- }
-
- def rearrangedColumnGroup(colGroup: String, dims: Seq[Field]): String = {
- // if columns in column group is not in schema order than arrange it in schema order
- var colGrpFieldIndx: Seq[Int] = Seq[Int]()
- colGroup.split(',').map(_.trim).foreach { col =>
- dims.zipWithIndex.foreach { dim =>
- if (dim._1.column.equalsIgnoreCase(col)) {
- colGrpFieldIndx :+= dim._2
- }
- }
- }
- // sort it
- colGrpFieldIndx = colGrpFieldIndx.sorted
- // check if columns in column group is in schema order
- if (!checkIfInSequence(colGrpFieldIndx)) {
- throw new MalformedCarbonCommandException("Invalid column group:" + colGroup)
- }
- def checkIfInSequence(colGrpFieldIndx: Seq[Int]): Boolean = {
- for (i <- 0 until (colGrpFieldIndx.length - 1)) {
- if ((colGrpFieldIndx(i + 1) - colGrpFieldIndx(i)) != 1) {
- throw new MalformedCarbonCommandException(
- "Invalid column group,column in group should be contiguous as per schema.")
- }
- }
- true
- }
- val colGrpNames: StringBuilder = StringBuilder.newBuilder
- for (i <- colGrpFieldIndx.indices) {
- colGrpNames.append(dims(colGrpFieldIndx(i)).column)
- if (i < (colGrpFieldIndx.length - 1)) {
- colGrpNames.append(",")
- }
- }
- colGrpNames.toString()
- }
-
- /**
- * Extract the column groups configuration from table properties.
- * Based on this Row groups of fields will be determined.
- *
- * @param tableProperties
- * @return
- */
- protected def updateColumnGroupsInField(tableProperties: Map[String, String],
- noDictionaryDims: Seq[String],
- msrs: Seq[Field],
- dims: Seq[Field]): Seq[String] = {
- if (tableProperties.get(CarbonCommonConstants.COLUMN_GROUPS).isDefined) {
-
- var splittedColGrps: Seq[String] = Seq[String]()
- val nonSplitCols: String = tableProperties(CarbonCommonConstants.COLUMN_GROUPS)
-
- // row groups will be specified in table properties like -> "(col1,col2),(col3,col4)"
- // here first splitting the value by () . so that the above will be splitted into 2 strings.
- // [col1,col2] [col3,col4]
- val m: Matcher = Pattern.compile("\\(([^)]+)\\)").matcher(nonSplitCols)
- while (m.find()) {
- val oneGroup: String = m.group(1)
- CommonUtil.validateColumnGroup(oneGroup, noDictionaryDims, msrs, splittedColGrps, dims)
- val arrangedColGrp = rearrangedColumnGroup(oneGroup, dims)
- splittedColGrps :+= arrangedColGrp
- }
- // This will be furthur handled.
- CommonUtil.arrangeColGrpsInSchemaOrder(splittedColGrps, dims)
- } else {
- null
- }
- }
-
- private def reorderDimensions(dims: Seq[Field]): Seq[Field] = {
- var complexDimensions: Seq[Field] = Seq()
- var dimensions: Seq[Field] = Seq()
- dims.foreach { dimension =>
- dimension.dataType.getOrElse("NIL") match {
- case "Array" => complexDimensions = complexDimensions :+ dimension
- case "Struct" => complexDimensions = complexDimensions :+ dimension
- case _ => dimensions = dimensions :+ dimension
- }
- }
- dimensions ++ complexDimensions
- }
-
- /**
- * This will extract the no inverted columns fields.
- * By default all dimensions use inverted index.
- *
- * @param fields
- * @param tableProperties
- * @return
- */
- protected def extractNoInvertedIndexColumns(fields: Seq[Field],
- tableProperties: Map[String, String]):
- Seq[String] = {
- // check whether the column name is in fields
- var noInvertedIdxColsProps: Array[String] = Array[String]()
- var noInvertedIdxCols: Seq[String] = Seq[String]()
-
- if (tableProperties.get("NO_INVERTED_INDEX").isDefined) {
- noInvertedIdxColsProps =
- tableProperties("NO_INVERTED_INDEX").split(',').map(_.trim)
- noInvertedIdxColsProps.foreach { noInvertedIdxColProp =>
- if (!fields.exists(field => field.column.equalsIgnoreCase(noInvertedIdxColProp))) {
- val errormsg = "NO_INVERTED_INDEX column: " + noInvertedIdxColProp +
- " does not exist in table. Please check create table statement."
- throw new MalformedCarbonCommandException(errormsg)
- }
- }
- }
- // check duplicate columns and only 1 col left
- val distinctCols = noInvertedIdxColsProps.toSet
- // extract the no inverted index columns
- fields.foreach(field => {
- if (distinctCols.exists(col => col.equalsIgnoreCase(field.column))) {
- noInvertedIdxCols :+= field.column
- }
- }
- )
- noInvertedIdxCols
- }
-
- private def normalizeType(field: Field): Field = {
- val dataType = field.dataType.getOrElse("NIL")
- dataType.toLowerCase match {
- case "string" => Field(field.column, Some("String"), field.name, Some(null), field.parent,
- field.storeType
- )
- case "integer" | "int" => Field(field.column, Some("Integer"), field.name, Some(null),
- field.parent, field.storeType
- )
- case "long" => Field(field.column, Some("Long"), field.name, Some(null), field.parent,
- field.storeType
- )
- case "double" => Field(field.column, Some("Double"), field.name, Some(null), field.parent,
- field.storeType
- )
- case "timestamp" => Field(field.column, Some("Timestamp"), field.name, Some(null),
- field.parent, field.storeType
- )
- case "numeric" => Field(field.column, Some("Numeric"), field.name, Some(null), field.parent,
- field.storeType
- )
- case "array" => Field(field.column, Some("Array"), field.name,
- field.children.map(f => f.map(normalizeType)),
- field.parent, field.storeType
- )
- case "struct" => Field(field.column, Some("Struct"), field.name,
- field.children.map(f => f.map(normalizeType)),
- field.parent, field.storeType
- )
- case "bigint" => Field(field.column, Some("BigInt"), field.name, Some(null), field.parent,
- field.storeType
- )
- case "decimal" => Field(field.column, Some("Decimal"), field.name, Some(null), field.parent,
- field.storeType, field.schemaOrdinal, field.precision, field.scale)
- // checking if the nested data type contains the child type as decimal(10,0),
- // if it is present then extracting the precision and scale. resetting the data type
- // with Decimal.
- case _ if dataType.startsWith("decimal") =>
- val (precision, scale) = getScaleAndPrecision(dataType)
- Field(field.column,
- Some("Decimal"),
- field.name,
- Some(null),
- field.parent,
- field.storeType, precision,
- scale
- )
- case _ =>
- field
- }
- }
-
- private def appendParentForEachChild(field: Field, parentName: String): Field = {
- field.dataType.getOrElse("NIL") match {
- case "String" => Field(parentName + "." + field.column, Some("String"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Integer" => Field(parentName + "." + field.column, Some("Integer"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Long" => Field(parentName + "." + field.column, Some("Long"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Double" => Field(parentName + "." + field.column, Some("Double"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Timestamp" => Field(parentName + "." + field.column, Some("Timestamp"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Numeric" => Field(parentName + "." + field.column, Some("Numeric"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Array" => Field(parentName + "." + field.column, Some("Array"),
- Some(parentName + "." + field.name.getOrElse(None)),
- field.children
- .map(f => f.map(appendParentForEachChild(_, parentName + "." + field.column))),
- parentName)
- case "Struct" => Field(parentName + "." + field.column, Some("Struct"),
- Some(parentName + "." + field.name.getOrElse(None)),
- field.children
- .map(f => f.map(appendParentForEachChild(_, parentName + "." + field.column))),
- parentName)
- case "BigInt" => Field(parentName + "." + field.column, Some("BigInt"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName)
- case "Decimal" => Field(parentName + "." + field.column, Some("Decimal"),
- Some(parentName + "." + field.name.getOrElse(None)), Some(null), parentName,
- field.storeType, field.precision, field.scale)
- case _ => field
- }
- }
-
- private def addParent(field: Field): Field = {
- field.dataType.getOrElse("NIL") match {
- case "Array" => Field(field.column, Some("Array"), field.name,
- field.children.map(f => f.map(appendParentForEachChild(_, field.column))), field.parent,
- field.storeType)
- case "Struct" => Field(field.column, Some("Struct"), field.name,
- field.children.map(f => f.map(appendParentForEachChild(_, field.column))), field.parent,
- field.storeType)
- case _ => field
- }
- }
-
- def getScaleAndPrecision(dataType: String): (Int, Int) = {
- val m: Matcher = Pattern.compile("^decimal\\(([^)]+)\\)").matcher(dataType)
- m.find()
- val matchedString: String = m.group(1)
- val scaleAndPrecision = matchedString.split(",")
- (Integer.parseInt(scaleAndPrecision(0).trim), Integer.parseInt(scaleAndPrecision(1).trim))
- }
-
- def prepareTableModel(ifNotExistPresent: Boolean, dbName: Option[String]
- , tableName: String, fields: Seq[Field],
- partitionCols: Seq[PartitionerField],
- bucketFields: Option[BucketFields],
- tableProperties: Map[String, String]): TableModel
- = {
-
- fields.zipWithIndex.foreach { x =>
- x._1.schemaOrdinal = x._2
- }
- val (dims, msrs, noDictionaryDims, sortKeyDims) = extractDimAndMsrFields(
- fields, tableProperties)
- if (dims.isEmpty) {
- throw new MalformedCarbonCommandException(
- s"Table ${dbName.getOrElse(CarbonCommonConstants.DATABASE_DEFAULT_NAME)}.$tableName " +
- "can not be created without key columns. Please use DICTIONARY_INCLUDE or " +
- "DICTIONARY_EXCLUDE to set at least one key column " +
- "if all specified columns are numeric types")
- }
-
- // column properties
- val colProps = extractColumnProperties(fields, tableProperties)
- // get column groups configuration from table properties.
- val groupCols: Seq[String] = updateColumnGroupsInField(tableProperties,
- noDictionaryDims, msrs, dims)
- if (groupCols != null) {
- throw new MalformedCarbonCommandException(
- s"${CarbonCommonConstants.COLUMN_GROUPS} is deprecated")
- }
-
- // get no inverted index columns from table properties.
- val noInvertedIdxCols = extractNoInvertedIndexColumns(fields, tableProperties)
-
- val partitionInfo = None
-
- // validate the tableBlockSize from table properties
- CommonUtil.validateTableBlockSize(tableProperties)
-
- TableModel(
- ifNotExistPresent,
- dbName.getOrElse(CarbonCommonConstants.DATABASE_DEFAULT_NAME),
- dbName,
- tableName,
- tableProperties,
- reorderDimensions(dims.map(f => normalizeType(f)).map(f => addParent(f))),
- msrs.map(f => normalizeType(f)),
- Option(sortKeyDims),
- Option(noDictionaryDims),
- Option(noInvertedIdxCols),
- groupCols,
- Some(colProps),
- bucketFields: Option[BucketFields],
- partitionInfo)
- }
-
-}
[03/12] carbondata git commit: Update hive-guide.md
Posted by ja...@apache.org.
Update hive-guide.md
Correct some invalid steps.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22b34773
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22b34773
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22b34773
Branch: refs/heads/encoding_override
Commit: 22b34773f1fc9176718780b6095f80f1647b375b
Parents: 4a6f57e
Author: Liang Chen <ch...@apache.org>
Authored: Sat Jun 24 17:24:08 2017 +0800
Committer: GitHub <no...@github.com>
Committed: Sat Jun 24 17:24:08 2017 +0800
----------------------------------------------------------------------
integration/hive/hive-guide.md | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/22b34773/integration/hive/hive-guide.md
----------------------------------------------------------------------
diff --git a/integration/hive/hive-guide.md b/integration/hive/hive-guide.md
index 4d3c740..9b72443 100644
--- a/integration/hive/hive-guide.md
+++ b/integration/hive/hive-guide.md
@@ -77,19 +77,20 @@ copy snappy-java-xxx.jar from "./<SPARK_HOME>/jars/" to "./Library/Java/Extensio
export HADOOP_OPTS="-Dorg.xerial.snappy.lib.path=/Library/Java/Extensions -Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib -Dorg.xerial.snappy.tempdir=/Users/apple/DEMO/tmp"
```
-### Alter schema in Hive
+### Start hive client
$HIVE_HOME/bin/hive
+### Initialize schema in hive
```
+create table in hive:
+CREATE TABLE IF NOT EXISTS hive_carbon(id int, name string, scale decimal, country string, salary double) row format delimited fields terminated by ',' stored as textfile;
+
alter table hive_carbon set FILEFORMAT
INPUTFORMAT "org.apache.carbondata.hive.MapredCarbonInputFormat"
OUTPUTFORMAT "org.apache.carbondata.hive.MapredCarbonOutputFormat"
SERDE "org.apache.carbondata.hive.CarbonHiveSerDe";
alter table hive_carbon set LOCATION '<hdfs store path>/carbon/store/default/hive_carbon';
-alter table hive_carbon change col id INT;
-alter table hive_carbon add columns(name string, scale decimal(10, 2), country string, salary double);
-
```
### Query data from hive table
[11/12] carbondata git commit: fix double issue
Posted by ja...@apache.org.
fix double issue
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/434f32dd
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/434f32dd
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/434f32dd
Branch: refs/heads/encoding_override
Commit: 434f32ddbbd56cf59cbb8ca54229ad17451d2491
Parents: 15acd9d
Author: QiangCai <qi...@qq.com>
Authored: Sat Jun 24 18:38:35 2017 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Mon Jun 26 22:23:17 2017 +0800
----------------------------------------------------------------------
.../page/encoding/DefaultEncodingStrategy.java | 11 +-
.../encoding/UpscaleDeltaFloatingCodec.java | 198 -------------------
.../page/encoding/UpscaleFloatingCodec.java | 34 ++--
.../primitiveTypes/DoubleDataTypeTestCase.scala | 91 +++++++++
4 files changed, 115 insertions(+), 219 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/434f32dd/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java
index 94e1cea..f8e43fc 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingStrategy.java
@@ -131,15 +131,8 @@ public class DefaultEncodingStrategy extends EncodingStrategy {
} else {
// double
DataType upscaleAdaptiveDataType = fitDataType(Math.pow(10, decimal) * absMaxValue, decimal);
- DataType upscaleDiffDataType =
- fitDataType(Math.pow(10, decimal) * (maxValue - minValue), decimal);
- if (upscaleAdaptiveDataType.getSizeInBytes() <= upscaleDiffDataType.getSizeInBytes()) {
- return UpscaleFloatingCodec.newInstance(
- srcDataType, upscaleAdaptiveDataType, stats, compressor);
- } else {
- return UpscaleDeltaFloatingCodec.newInstance(
- srcDataType, upscaleDiffDataType, stats, compressor);
- }
+ return UpscaleFloatingCodec.newInstance(
+ srcDataType, upscaleAdaptiveDataType, stats, compressor);
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/434f32dd/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleDeltaFloatingCodec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleDeltaFloatingCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleDeltaFloatingCodec.java
deleted file mode 100644
index e53346b..0000000
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleDeltaFloatingCodec.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.core.datastore.page.encoding;
-
-import java.math.BigDecimal;
-
-import org.apache.carbondata.core.datastore.compression.Compressor;
-import org.apache.carbondata.core.datastore.page.ColumnPage;
-import org.apache.carbondata.core.datastore.page.LazyColumnPage;
-import org.apache.carbondata.core.datastore.page.PrimitiveCodec;
-import org.apache.carbondata.core.datastore.page.statistics.ColumnPageStatsVO;
-import org.apache.carbondata.core.memory.MemoryException;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-
-/**
- * Codec for floating point (float, double) data type page.
- * This codec will upscale (multiple page value by decimal) to integer value,
- * and do type casting to make storage minimum.
- */
-public class UpscaleDeltaFloatingCodec extends AdaptiveCompressionCodec {
-
- private ColumnPage encodedPage;
-
- private BigDecimal max;
- private double factor;
-
- public static ColumnPageCodec newInstance(DataType srcDataType, DataType targetDataType,
- ColumnPageStatsVO stats, Compressor compressor) {
- return new UpscaleDeltaFloatingCodec(srcDataType, targetDataType, stats, compressor);
- }
-
- private UpscaleDeltaFloatingCodec(DataType srcDataType, DataType targetDataType,
- ColumnPageStatsVO stats, Compressor compressor) {
- super(srcDataType, targetDataType, stats, compressor);
- this.max = BigDecimal.valueOf((double) stats.getMax());
- this.factor = Math.pow(10, stats.getDecimal());
- }
-
- @Override
- public String getName() {
- return "UpscaleDeltaFloatingCodec";
- }
-
- @Override
- public byte[] encode(ColumnPage input) throws MemoryException {
- if (srcDataType.equals(targetDataType)) {
- return input.compress(compressor);
- } else {
- encodedPage = ColumnPage.newPage(targetDataType, input.getPageSize());
- input.encode(codec);
- byte[] result = encodedPage.compress(compressor);
- encodedPage.freeMemory();
- return result;
- }
- }
-
- @Override
- public ColumnPage decode(byte[] input, int offset, int length) throws MemoryException {
- if (srcDataType.equals(targetDataType)) {
- return ColumnPage.decompress(compressor, targetDataType, input, offset, length);
- } else {
- ColumnPage page = ColumnPage.decompress(compressor, targetDataType, input, offset, length);
- return LazyColumnPage.newPage(page, codec);
- }
- }
-
- // encoded value = (10 power of decimal) * ((max value of page) - (page value))
- private PrimitiveCodec codec = new PrimitiveCodec() {
- @Override
- public void encode(int rowId, byte value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public void encode(int rowId, short value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public void encode(int rowId, int value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public void encode(int rowId, long value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public void encode(int rowId, float value) {
- double diff = max.subtract(BigDecimal.valueOf(value)).doubleValue();
- switch (targetDataType) {
- case BYTE:
- encodedPage.putByte(rowId, (byte)(Math.round(factor * diff)));
- break;
- case SHORT:
- encodedPage.putShort(rowId, (short)(Math.round(factor * diff)));
- break;
- case INT:
- encodedPage.putInt(rowId, (int)(Math.round(factor * diff)));
- break;
- case LONG:
- encodedPage.putLong(rowId, (long)(Math.round(factor * diff)));
- break;
- default:
- throw new RuntimeException("internal error: " + debugInfo());
- }
- }
-
- @Override
- public void encode(int rowId, double value) {
- double diff = max.subtract(BigDecimal.valueOf(value)).doubleValue();
- switch (targetDataType) {
- case BYTE:
- encodedPage.putByte(rowId, (byte)(Math.round(factor * diff)));
- break;
- case SHORT:
- encodedPage.putShort(rowId, (short)(Math.round(factor * diff)));
- break;
- case INT:
- encodedPage.putInt(rowId, (int)(Math.round(factor * diff)));
- break;
- case LONG:
- encodedPage.putLong(rowId, (long)(Math.round(factor * diff)));
- break;
- default:
- throw new RuntimeException("internal error: " + debugInfo());
- }
- }
-
- @Override
- public long decodeLong(byte value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public long decodeLong(short value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public long decodeLong(int value) {
- // this codec is for floating point type only
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public double decodeDouble(byte value) {
- return max.subtract(BigDecimal.valueOf(value / factor)).doubleValue();
- }
-
- @Override
- public double decodeDouble(short value) {
- return max.subtract(BigDecimal.valueOf(value / factor)).doubleValue();
- }
-
- @Override
- public double decodeDouble(int value) {
- return max.subtract(BigDecimal.valueOf(value / factor)).doubleValue();
- }
-
- @Override
- public double decodeDouble(long value) {
- return max.subtract(BigDecimal.valueOf(value / factor)).doubleValue();
- }
-
- @Override
- public double decodeDouble(float value) {
- throw new RuntimeException("internal error: " + debugInfo());
- }
-
- @Override
- public double decodeDouble(double value) {
- throw new RuntimeException("internal error: " + debugInfo());
- }
- };
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/434f32dd/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
index 56c4508..73898af 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
@@ -17,6 +17,8 @@
package org.apache.carbondata.core.datastore.page.encoding;
+import java.math.BigDecimal;
+
import org.apache.carbondata.core.datastore.compression.Compressor;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.datastore.page.LazyColumnPage;
@@ -105,16 +107,20 @@ public class UpscaleFloatingCodec extends AdaptiveCompressionCodec {
public void encode(int rowId, float value) {
switch (targetDataType) {
case BYTE:
- encodedPage.putByte(rowId, (byte)(Math.round(factor * value)));
+ encodedPage.putByte(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).byteValue());
break;
case SHORT:
- encodedPage.putShort(rowId, (short)(Math.round(factor * value)));
+ encodedPage.putShort(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).shortValue());
break;
case INT:
- encodedPage.putInt(rowId, (int)(Math.round(factor * value)));
+ encodedPage.putInt(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).intValue());
break;
case LONG:
- encodedPage.putLong(rowId, (long)(Math.round(factor * value)));
+ encodedPage.putLong(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).longValue());
break;
default:
throw new RuntimeException("internal error: " + debugInfo());
@@ -125,16 +131,20 @@ public class UpscaleFloatingCodec extends AdaptiveCompressionCodec {
public void encode(int rowId, double value) {
switch (targetDataType) {
case BYTE:
- encodedPage.putByte(rowId, (byte)(Math.round(factor * value)));
+ encodedPage.putByte(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).byteValue());
break;
case SHORT:
- encodedPage.putShort(rowId, (short)(Math.round(factor * value)));
+ encodedPage.putShort(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).shortValue());
break;
case INT:
- encodedPage.putInt(rowId, (int)(Math.round(factor * value)));
+ encodedPage.putInt(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).intValue());
break;
case LONG:
- encodedPage.putLong(rowId, (long)(Math.round(factor * value)));
+ encodedPage.putLong(rowId,
+ BigDecimal.valueOf(value).multiply(BigDecimal.valueOf(factor)).longValue());
break;
case DOUBLE:
encodedPage.putDouble(rowId, value);
@@ -161,22 +171,22 @@ public class UpscaleFloatingCodec extends AdaptiveCompressionCodec {
@Override
public double decodeDouble(byte value) {
- return value / factor;
+ return BigDecimal.valueOf(value).divide(BigDecimal.valueOf(factor)).doubleValue();
}
@Override
public double decodeDouble(short value) {
- return value / factor;
+ return BigDecimal.valueOf(value).divide(BigDecimal.valueOf(factor)).doubleValue();
}
@Override
public double decodeDouble(int value) {
- return value / factor;
+ return BigDecimal.valueOf(value).divide(BigDecimal.valueOf(factor)).doubleValue();
}
@Override
public double decodeDouble(long value) {
- return value / factor;
+ return BigDecimal.valueOf(value).divide(BigDecimal.valueOf(factor)).doubleValue();
}
@Override
http://git-wip-us.apache.org/repos/asf/carbondata/blob/434f32dd/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/DoubleDataTypeTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/DoubleDataTypeTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/DoubleDataTypeTestCase.scala
new file mode 100644
index 0000000..e72151d
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/DoubleDataTypeTestCase.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.integration.spark.testsuite.primitiveTypes
+
+import java.util.Random
+
+import org.apache.spark.sql.{DataFrame, Row, SaveMode}
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.types._
+import org.scalatest.BeforeAndAfterAll
+
+/**
+ * Test Class for filter query on Double datatypes
+ */
+class DoubleDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
+
+ lazy val df: DataFrame = generateDataFrame
+
+ private def generateDataFrame(): DataFrame = {
+ val r = new Random()
+ val rdd = sqlContext.sparkContext
+ .parallelize(1 to 10, 2)
+ .map { x =>
+ Row(x, "London" + (x % 2), x.toDouble / 13, x.toDouble / 11)
+ }
+
+ val schema = StructType(
+ Seq(
+ StructField("id", IntegerType, nullable = false),
+ StructField("city", StringType, nullable = false),
+ StructField("m1", DoubleType, nullable = false),
+ StructField("m2", DoubleType, nullable = false)
+ )
+ )
+
+ sqlContext.createDataFrame(rdd, schema)
+ }
+
+ override def beforeAll {
+ sql("drop table if exists doubleTypeCarbonTable")
+ sql("drop table if exists doubleTypeHiveTable")
+
+ df.write
+ .format("carbondata")
+ .option("tableName", "doubleTypeCarbonTable")
+ .option("tempCSV", "false")
+ .option("single_pass", "true")
+ .option("dictionary_exclude", "city")
+ .option("table_blocksize", "32")
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ df.write
+ .mode(SaveMode.Overwrite)
+ .saveAsTable("doubleTypeHiveTable")
+
+ }
+
+ test("detail query") {
+ checkAnswer(sql("select * from doubleTypeCarbonTable order by id"),
+ sql("select * from doubleTypeHiveTable order by id"))
+
+ }
+
+// test("agg query") {
+// checkAnswer(sql("select city, sum(m1), avg(m1), count(m1), max(m1), min(m1) from doubleTypeCarbonTable group by city"),
+// sql("select city, sum(m1), avg(m1), count(m1), max(m1), min(m1) from doubleTypeHiveTable group by city"))
+//
+// checkAnswer(sql("select city, sum(m2), avg(m2), count(m2), max(m2), min(m2) from doubleTypeCarbonTable group by city"),
+// sql("select city, sum(m2), avg(m2), count(m2), max(m2), min(m2) from doubleTypeHiveTable group by city"))
+// }
+
+ override def afterAll {
+ sql("drop table if exists doubleTypeCarbonTable")
+ sql("drop table if exists doubleTypeHiveTable")
+ }
+}
\ No newline at end of file
[09/12] carbondata git commit: fixed codec for UpscaleFloatingCodec
Posted by ja...@apache.org.
fixed codec for UpscaleFloatingCodec
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d8254b8
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d8254b8
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d8254b8
Branch: refs/heads/encoding_override
Commit: 1d8254b85c7a0613e2fc2698d2a642fc52b6335e
Parents: 3dec25b
Author: kunal642 <ku...@knoldus.in>
Authored: Sat Jun 17 13:37:08 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jun 26 21:08:53 2017 +0800
----------------------------------------------------------------------
.../datastore/page/encoding/UpscaleFloatingCodec.java | 2 +-
.../datastore/page/statistics/ColumnPageStatsVO.java | 2 +-
.../spark-common-test/src/test/resources/double.csv | 3 +++
.../TestLoadDataWithHiveSyntaxDefaultFormat.scala | 11 +++++++++++
4 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d8254b8/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
index 19c6bd7..56c4508 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/UpscaleFloatingCodec.java
@@ -137,7 +137,7 @@ public class UpscaleFloatingCodec extends AdaptiveCompressionCodec {
encodedPage.putLong(rowId, (long)(Math.round(factor * value)));
break;
case DOUBLE:
- encodedPage.putDouble(rowId, (Math.round(factor * value)));
+ encodedPage.putDouble(rowId, value);
break;
default:
throw new RuntimeException("internal error: " + debugInfo());
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d8254b8/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java
index 642e6b2..058699a 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/ColumnPageStatsVO.java
@@ -96,7 +96,7 @@ public class ColumnPageStatsVO {
case DOUBLE:
max = ((double) max > (double) value) ? max : value;
min = ((double) min < (double) value) ? min : value;
- int num = getDecimalCount((double) value);
+ int num = Math.abs(getDecimalCount((double) value));
decimal = decimal > num ? decimal : num;
nonExistValue = (double) min - 1;
break;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d8254b8/integration/spark-common-test/src/test/resources/double.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/double.csv b/integration/spark-common-test/src/test/resources/double.csv
new file mode 100644
index 0000000..267392e
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/double.csv
@@ -0,0 +1,3 @@
+empno, salary
+'abc', 775678765456789098765432.789
+'def', 876567898743456785232.44431
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d8254b8/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
index f8e11de..b8713e5 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxDefaultFormat.scala
@@ -670,6 +670,17 @@ class TestLoadDataWithHiveSyntaxDefaultFormat extends QueryTest with BeforeAndAf
Row("~carbon,")))
}
+ test("test data load with double datatype") {
+ sql("drop table if exists double_test")
+ sql(
+ "CREATE table double_test (empno string, salary double) STORED BY 'carbondata' TBLPROPERTIES" +
+ "('DICTIONARY_EXCLUDE'='empno')"
+ )
+ sql(
+ s"load data local inpath '$resourcesPath/double.csv' into table double_test options" +
+ "('FILEHEADER'='empno,salary')")
+ checkAnswer(sql("select salary from double_test limit 1"),Row(7.756787654567891E23))
+ }
override def afterAll {
sql("drop table if exists escapechar1")
[10/12] carbondata git commit: [CARBONDATA-1188] fixed codec for
UpscaleFloatingCodec This closes #1053
Posted by ja...@apache.org.
[CARBONDATA-1188] fixed codec for UpscaleFloatingCodec This closes #1053
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/15acd9d7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/15acd9d7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/15acd9d7
Branch: refs/heads/encoding_override
Commit: 15acd9d71b45d863f4672f7725c640ce344c214b
Parents: 3dec25b 1d8254b
Author: jackylk <ja...@huawei.com>
Authored: Mon Jun 26 21:09:17 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jun 26 21:09:17 2017 +0800
----------------------------------------------------------------------
.../datastore/page/encoding/UpscaleFloatingCodec.java | 2 +-
.../datastore/page/statistics/ColumnPageStatsVO.java | 2 +-
.../spark-common-test/src/test/resources/double.csv | 3 +++
.../TestLoadDataWithHiveSyntaxDefaultFormat.scala | 11 +++++++++++
4 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
[06/12] carbondata git commit: [CARBONDATA-1197] Update related docs
which still use incubating such as presto integration This closes #1066
Posted by ja...@apache.org.
[CARBONDATA-1197] Update related docs which still use incubating such as presto integration This closes #1066
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22514b27
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22514b27
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22514b27
Branch: refs/heads/encoding_override
Commit: 22514b2719f9306ca9d7204442299eb4e9c7dd8e
Parents: 8803da2 9f855f0
Author: chenliang613 <ch...@apache.org>
Authored: Sat Jun 24 17:44:48 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jun 24 17:44:48 2017 +0800
----------------------------------------------------------------------
docs/installation-guide.md | 6 +++---
integration/presto/README.md | 8 ++++----
2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
[12/12] carbondata git commit: [CARBONDATA-1228] the query result of
double is not correct This closes #1091
Posted by ja...@apache.org.
[CARBONDATA-1228] the query result of double is not correct This closes #1091
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3ecb3ec5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3ecb3ec5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3ecb3ec5
Branch: refs/heads/encoding_override
Commit: 3ecb3ec58b9156876ae6b9f4229ade3399f8d84c
Parents: 15acd9d 434f32d
Author: jackylk <ja...@huawei.com>
Authored: Mon Jun 26 22:58:38 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jun 26 22:58:38 2017 +0800
----------------------------------------------------------------------
.../page/encoding/DefaultEncodingStrategy.java | 11 +-
.../encoding/UpscaleDeltaFloatingCodec.java | 198 -------------------
.../page/encoding/UpscaleFloatingCodec.java | 34 ++--
.../primitiveTypes/DoubleDataTypeTestCase.scala | 91 +++++++++
4 files changed, 115 insertions(+), 219 deletions(-)
----------------------------------------------------------------------
[04/12] carbondata git commit: Correct some invalid steps in
hive-guide.md This closes #1090
Posted by ja...@apache.org.
Correct some invalid steps in hive-guide.md This closes #1090
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8803da23
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8803da23
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8803da23
Branch: refs/heads/encoding_override
Commit: 8803da23f7b3e7a5053f4842d0e3cc0e1e0c34ae
Parents: 4a6f57e 22b3477
Author: chenliang613 <ch...@apache.org>
Authored: Sat Jun 24 17:32:11 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jun 24 17:32:11 2017 +0800
----------------------------------------------------------------------
integration/hive/hive-guide.md | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
[02/12] carbondata git commit: [CARBONDATA-1223] Fixing empty file
creation in batch sort loading This closes #1087
Posted by ja...@apache.org.
[CARBONDATA-1223] Fixing empty file creation in batch sort loading This closes #1087
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/4a6f57eb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/4a6f57eb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/4a6f57eb
Branch: refs/heads/encoding_override
Commit: 4a6f57ebfdc64d04d8b2a5df9aea0dc340a30ba0
Parents: 30ef14e 0205fa6
Author: ravipesala <ra...@gmail.com>
Authored: Sat Jun 24 08:35:59 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Sat Jun 24 08:35:59 2017 +0530
----------------------------------------------------------------------
.../UnsafeBatchParallelReadMergeSorterImpl.java | 16 +++++++++---
.../UnsafeSingleThreadFinalSortFilesMerger.java | 26 --------------------
.../steps/DataWriterBatchProcessorStepImpl.java | 18 ++++++++------
3 files changed, 23 insertions(+), 37 deletions(-)
----------------------------------------------------------------------