You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/07/12 15:04:37 UTC
[01/50] [abbrv] carbondata git commit: fix compact bug for partition
table [Forced Update!]
Repository: carbondata
Updated Branches:
refs/heads/datamap 6d71d9c47 -> b385d14b4 (forced update)
fix compact bug for partition table
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/aecf496e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/aecf496e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/aecf496e
Branch: refs/heads/datamap
Commit: aecf496eda9c1b3ae854a16d4dcbfb1c7e3701e8
Parents: c8f742d
Author: QiangCai <da...@gmail.com>
Authored: Wed Jun 7 11:51:08 2017 +0800
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 11:56:31 2017 +0530
----------------------------------------------------------------------
.../core/metadata/schema/table/CarbonTable.java | 4 +
.../TestCompactionForPartitionTable.scala | 84 ++++++++++++++++++++
.../carbondata/spark/rdd/CarbonMergerRDD.scala | 27 +++++--
.../spark/rdd/CarbonSparkPartition.scala | 3 +-
4 files changed, 109 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/aecf496e/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 429b1df..ae97262 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -593,6 +593,10 @@ public class CarbonTable implements Serializable {
return tablePartitionMap.get(tableName);
}
+ public boolean isPartitionTable() {
+ return null != tablePartitionMap.get(getFactTableName());
+ }
+
public PartitionStatistic getPartitionStatistic() {
return partitionStatistic;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/aecf496e/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestCompactionForPartitionTable.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestCompactionForPartitionTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestCompactionForPartitionTable.scala
new file mode 100644
index 0000000..ae8387e
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestCompactionForPartitionTable.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.spark.testsuite.partition
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.test.TestQueryExecutor
+import org.scalatest.BeforeAndAfterAll
+
+class TestCompactionForPartitionTable extends QueryTest with BeforeAndAfterAll {
+
+ override def beforeAll {
+ dropTable
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+ sql(
+ """
+ | CREATE TABLE originTable (empno int, empname String, designation String, doj Timestamp,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE originTable OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""")
+ }
+
+ test("minor compaction") {
+ sql("create table part_minor_compact(a String, b int) partitioned by (c int) stored by 'carbondata' tblproperties('PARTITION_TYPE'='LIST','LIST_INFO'='1,2')")
+ sql("insert into part_minor_compact select 'a', 2, 3 from originTable limit 1")
+ sql("insert into part_minor_compact select 'b', 3, 4 from originTable limit 1")
+ sql("insert into part_minor_compact select 'c', 4, 5 from originTable limit 1")
+ sql("insert into part_minor_compact select 'd', 1, 2 from originTable limit 1")
+
+ checkAnswer(sql("select * from part_minor_compact where c = 4"), Seq(Row("b", 3, 4)))
+
+ sql("alter table part_minor_compact compact 'minor'")
+
+ checkAnswer(sql("select * from part_minor_compact where c = 4"), Seq(Row("b", 3, 4)))
+ }
+
+ test("major compaction") {
+ sql("create table part_major_compact(a String, b int) partitioned by (c int) stored by 'carbondata' tblproperties('PARTITION_TYPE'='LIST','LIST_INFO'='1,2')")
+ sql("insert into part_major_compact select 'a', 2, 3 from originTable limit 1")
+ sql("insert into part_major_compact select 'b', 3, 4 from originTable limit 1")
+ sql("insert into part_major_compact select 'c', 4, 5 from originTable limit 1")
+ sql("insert into part_major_compact select 'd', 1, 2 from originTable limit 1")
+
+ checkAnswer(sql("select * from part_major_compact where c = 4"), Seq(Row("b", 3, 4)))
+
+ sql("alter table part_major_compact compact 'major'")
+
+ checkAnswer(sql("select * from part_major_compact where c = 4"), Seq(Row("b", 3, 4)))
+ }
+
+ override def afterAll = {
+ dropTable
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, TestQueryExecutor.timestampFormat)
+ }
+
+ def dropTable = {
+ sql("drop table if exists part_minor_compact")
+ sql("drop table if exists part_major_compact")
+ sql("drop table if exists originTable")
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/aecf496e/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
index 908043a..815dba3 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
@@ -77,8 +77,13 @@ class CarbonMergerRDD[K, V](
override def internalCompute(theSplit: Partition, context: TaskContext): Iterator[(K, V)] = {
val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
val iter = new Iterator[(K, V)] {
-
- carbonLoadModel.setTaskNo(String.valueOf(theSplit.index))
+ val carbonTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
+ val carbonSparkPartition = theSplit.asInstanceOf[CarbonSparkPartition]
+ if (carbonTable.isPartitionTable) {
+ carbonLoadModel.setTaskNo(String.valueOf(carbonSparkPartition.partitionId))
+ } else {
+ carbonLoadModel.setTaskNo(String.valueOf(theSplit.index))
+ }
val tempLocationKey = CarbonDataProcessorUtil
.getTempStoreLocationKey(carbonLoadModel.getDatabaseName,
carbonLoadModel.getTableName,
@@ -108,7 +113,7 @@ class CarbonMergerRDD[K, V](
var mergeNumber = ""
var exec: CarbonCompactionExecutor = null
try {
- val carbonSparkPartition = theSplit.asInstanceOf[CarbonSparkPartition]
+
// sorting the table block info List.
val splitList = carbonSparkPartition.split.value.getAllSplits
@@ -140,7 +145,6 @@ class CarbonMergerRDD[K, V](
.toList
}
- val carbonTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
// get destination segment properties as sent from driver which is of last segment.
val segmentProperties = new SegmentProperties(
carbonMergerMapping.maxSegmentColumnSchemaList.asJava,
@@ -266,7 +270,8 @@ class CarbonMergerRDD[K, V](
job.getConfiguration.set("query.id", queryId)
var defaultParallelism = sparkContext.defaultParallelism
val result = new java.util.ArrayList[Partition](defaultParallelism)
- var partitionNo = 0
+ var taskPartitionNo = 0
+ var carbonPartitionId = 0;
var columnSize = 0
var noOfBlocks = 0
@@ -398,6 +403,7 @@ class CarbonMergerRDD[K, V](
logInfo("Time taken to wait for executor allocation is =" + ((30 - maxTimes) * 500) + "millis")
defaultParallelism = sparkContext.defaultParallelism
+ val isPartitionTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable.isPartitionTable
// Create Spark Partition for each task and assign blocks
nodeBlockMap.asScala.foreach { case (nodeName, splitList) =>
val taskSplitList = new java.util.ArrayList[NodeInfo](0)
@@ -410,11 +416,16 @@ class CarbonMergerRDD[K, V](
NodeInfo(splitsPerNode.getTaskId, splitsPerNode.getCarbonInputSplitList.size()))
if (blockletCount != 0) {
+ val taskInfo = splitInfo.asInstanceOf[CarbonInputSplitTaskInfo]
val multiBlockSplit = new CarbonMultiBlockSplit(absoluteTableIdentifier,
- splitInfo.asInstanceOf[CarbonInputSplitTaskInfo].getCarbonInputSplitList,
+ taskInfo.getCarbonInputSplitList,
Array(nodeName))
- result.add(new CarbonSparkPartition(id, partitionNo, multiBlockSplit))
- partitionNo += 1
+ if (isPartitionTable) {
+ carbonPartitionId = Integer.parseInt(taskInfo.getTaskId)
+ }
+ result.add(
+ new CarbonSparkPartition(id, taskPartitionNo, multiBlockSplit, carbonPartitionId))
+ taskPartitionNo += 1
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/aecf496e/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonSparkPartition.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonSparkPartition.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonSparkPartition.scala
index 82a471f..cf539ba 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonSparkPartition.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonSparkPartition.scala
@@ -24,7 +24,8 @@ import org.apache.carbondata.hadoop.CarbonMultiBlockSplit
class CarbonSparkPartition(
val rddId: Int,
val idx: Int,
- @transient val multiBlockSplit: CarbonMultiBlockSplit)
+ @transient val multiBlockSplit: CarbonMultiBlockSplit,
+ val partitionId: Int = 0)
extends Partition {
val split = new SerializableWritable[CarbonMultiBlockSplit](multiBlockSplit)
[48/50] [abbrv] carbondata git commit: [CARBONDATA-1232] Datamap
implementation for Blocklet
Posted by ja...@apache.org.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
index e4d3ba5..8aa18d5 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
@@ -17,14 +17,31 @@
package org.apache.carbondata.presto.impl;
-import com.facebook.presto.spi.SchemaTableName;
-import com.facebook.presto.spi.classloader.ThreadContextClassLoader;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.inject.Inject;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.datastore.*;
-import org.apache.carbondata.core.datastore.block.*;
+import org.apache.carbondata.core.datastore.DataRefNode;
+import org.apache.carbondata.core.datastore.DataRefNodeFinder;
+import org.apache.carbondata.core.datastore.IndexKey;
+import org.apache.carbondata.core.datastore.SegmentTaskIndexStore;
+import org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier;
+import org.apache.carbondata.core.datastore.block.AbstractIndex;
+import org.apache.carbondata.core.datastore.block.BlockletInfos;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.datastore.exception.IndexBuilderException;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
@@ -52,18 +69,24 @@ import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.hadoop.CacheClient;
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil;
+
+import com.facebook.presto.spi.SchemaTableName;
+import com.facebook.presto.spi.classloader.ThreadContextClassLoader;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.inject.Inject;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.thrift.TBase;
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
import static java.util.Objects.requireNonNull;
import com.facebook.presto.spi.TableNotFoundException;
@@ -392,8 +415,9 @@ public class CarbonTableReader {
TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
if (IUDTable) {
- if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, invalidBlockVOForSegmentId,
- updateStatusManager)) {
+ if (CarbonUtil
+ .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(),
+ invalidBlockVOForSegmentId, updateStatusManager)) {
continue;
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala
index 277005b..da0d082 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala
@@ -29,7 +29,8 @@ import org.apache.spark.sql.execution.command.CarbonMergerMapping
import org.apache.carbondata.core.datastore.block.{Distributable, TableBlockInfo}
import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier}
-import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonMultiBlockSplit}
+import org.apache.carbondata.hadoop.{CarbonInputSplit, CarbonMultiBlockSplit}
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil
import org.apache.carbondata.processing.merger.CarbonDataMergerUtil
import org.apache.carbondata.processing.model.CarbonLoadModel
@@ -71,7 +72,7 @@ class CarbonIUDMergerRDD[K, V](
var blocksOfLastSegment: List[TableBlockInfo] = null
- CarbonInputFormat.setSegmentsToAccess(
+ CarbonTableInputFormat.setSegmentsToAccess(
job.getConfiguration, carbonMergerMapping.validSegments.toList.asJava)
// get splits
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
index 815dba3..a1d5262 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
@@ -43,8 +43,8 @@ import org.apache.carbondata.core.mutate.UpdateVO
import org.apache.carbondata.core.scan.result.iterator.RawResultIterator
import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
-import org.apache.carbondata.core.util.path.CarbonTablePath
-import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonMultiBlockSplit}
+import org.apache.carbondata.hadoop.{CarbonInputSplit, CarbonMultiBlockSplit}
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
import org.apache.carbondata.hadoop.util.{CarbonInputFormatUtil, CarbonInputSplitTaskInfo}
import org.apache.carbondata.processing.merger._
import org.apache.carbondata.processing.model.CarbonLoadModel
@@ -291,7 +291,7 @@ class CarbonMergerRDD[K, V](
for (eachSeg <- carbonMergerMapping.validSegments) {
// map for keeping the relation of a task and its blocks.
- job.getConfiguration.set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, eachSeg)
+ job.getConfiguration.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, eachSeg)
if (updateStatusManager.getUpdateStatusDetails.length != 0) {
updateDetails = updateStatusManager.getInvalidTimestampRange(eachSeg)
@@ -313,7 +313,8 @@ class CarbonMergerRDD[K, V](
updateStatusManager.getDeleteDeltaFilePath(entry.getPath.toString)
)
((!updated) || ((updated) && (!CarbonUtil
- .isInvalidTableBlock(blockInfo, updateDetails, updateStatusManager))))
+ .isInvalidTableBlock(blockInfo.getSegmentId, blockInfo.getFilePath,
+ updateDetails, updateStatusManager))))
})
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala
index 3868342..3780716 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala
@@ -40,6 +40,7 @@ import org.apache.carbondata.core.scan.model.QueryModel
import org.apache.carbondata.core.stats.{QueryStatistic, QueryStatisticsConstants, QueryStatisticsRecorder}
import org.apache.carbondata.core.util.{CarbonProperties, CarbonTimeStatisticsFactory}
import org.apache.carbondata.hadoop._
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
import org.apache.carbondata.spark.load.CarbonLoaderUtil
@@ -245,21 +246,22 @@ class CarbonScanRDD(
iterator.asInstanceOf[Iterator[InternalRow]]
}
- private def prepareInputFormatForDriver(conf: Configuration): CarbonInputFormat[Object] = {
- CarbonInputFormat.setCarbonTable(conf, carbonTable)
+ private def prepareInputFormatForDriver(conf: Configuration): CarbonTableInputFormat[Object] = {
+ CarbonTableInputFormat.setCarbonTable(conf, carbonTable)
createInputFormat(conf)
}
- private def prepareInputFormatForExecutor(conf: Configuration): CarbonInputFormat[Object] = {
- CarbonInputFormat.setCarbonReadSupport(conf, readSupport)
+ private def prepareInputFormatForExecutor(conf: Configuration): CarbonTableInputFormat[Object] = {
+ CarbonTableInputFormat.setCarbonReadSupport(conf, readSupport)
createInputFormat(conf)
}
- private def createInputFormat(conf: Configuration): CarbonInputFormat[Object] = {
- val format = new CarbonInputFormat[Object]
- CarbonInputFormat.setTablePath(conf, identifier.appendWithLocalPrefix(identifier.getTablePath))
- CarbonInputFormat.setFilterPredicates(conf, filterExpression)
- CarbonInputFormat.setColumnProjection(conf, columnProjection)
+ private def createInputFormat(conf: Configuration): CarbonTableInputFormat[Object] = {
+ val format = new CarbonTableInputFormat[Object]
+ CarbonTableInputFormat.setTablePath(conf,
+ identifier.appendWithLocalPrefix(identifier.getTablePath))
+ CarbonTableInputFormat.setFilterPredicates(conf, filterExpression)
+ CarbonTableInputFormat.setColumnProjection(conf, columnProjection)
format
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
index 2ca3b8c..4950227 100644
--- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
+++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
-import org.apache.carbondata.hadoop.CarbonInputFormat
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
/**
@@ -38,8 +38,8 @@ object QueryPlanUtil {
* createCarbonInputFormat from query model
*/
def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier) :
- (CarbonInputFormat[Array[Object]], Job) = {
- val carbonInputFormat = new CarbonInputFormat[Array[Object]]()
+ (CarbonTableInputFormat[Array[Object]], Job) = {
+ val carbonInputFormat = new CarbonTableInputFormat[Array[Object]]()
val jobConf: JobConf = new JobConf(new Configuration)
val job: Job = new Job(jobConf)
FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath))
@@ -47,8 +47,8 @@ object QueryPlanUtil {
}
def createCarbonInputFormat[V: ClassTag](absoluteTableIdentifier: AbsoluteTableIdentifier,
- conf: Configuration) : CarbonInputFormat[V] = {
- val carbonInputFormat = new CarbonInputFormat[V]()
+ conf: Configuration) : CarbonTableInputFormat[V] = {
+ val carbonInputFormat = new CarbonTableInputFormat[V]()
val job: Job = new Job(conf)
FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath))
carbonInputFormat
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
index 2fc93e6..ba89d51 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
@@ -38,7 +38,8 @@ import org.apache.spark.util.SerializableConfiguration
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
import org.apache.carbondata.core.scan.expression.logical.AndExpression
import org.apache.carbondata.core.util.path.CarbonTablePath
-import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonProjection}
+import org.apache.carbondata.hadoop.{CarbonInputSplit, CarbonProjection}
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
import org.apache.carbondata.hadoop.util.{CarbonInputFormatUtil, SchemaReader}
import org.apache.carbondata.processing.merger.TableMeta
import org.apache.carbondata.spark.{CarbonFilters, CarbonOption}
@@ -89,16 +90,17 @@ private[sql] case class CarbonDatasourceHadoopRelation(
filters.flatMap { filter =>
CarbonFilters.createCarbonFilter(dataSchema, filter)
}.reduceOption(new AndExpression(_, _))
- .foreach(CarbonInputFormat.setFilterPredicates(conf, _))
+ .foreach(CarbonTableInputFormat.setFilterPredicates(conf, _))
val projection = new CarbonProjection
requiredColumns.foreach(projection.addColumn)
- CarbonInputFormat.setColumnProjection(conf, projection)
- CarbonInputFormat.setCarbonReadSupport(conf, classOf[SparkRowReadSupportImpl])
+ CarbonTableInputFormat.setColumnProjection(conf, projection)
+ CarbonTableInputFormat.setCarbonReadSupport(conf, classOf[SparkRowReadSupportImpl])
+
new CarbonHadoopFSRDD[Row](sqlContext.sparkContext,
new SerializableConfiguration(conf),
absIdentifier,
- classOf[CarbonInputFormat[Row]],
+ classOf[CarbonTableInputFormat[Row]],
classOf[Row]
)
}
@@ -118,7 +120,7 @@ class CarbonHadoopFSRDD[V: ClassTag](
@transient sc: SparkContext,
conf: SerializableConfiguration,
identifier: AbsoluteTableIdentifier,
- inputFormatClass: Class[_ <: CarbonInputFormat[V]],
+ inputFormatClass: Class[_ <: CarbonTableInputFormat[V]],
valueClass: Class[V])
extends CarbonRDD[V](sc, Nil) with SparkHadoopMapReduceUtil {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/IUDCommands.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/IUDCommands.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/IUDCommands.scala
index a292cde..c38f0e1 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/IUDCommands.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/IUDCommands.scala
@@ -133,13 +133,6 @@ private[sql] case class ProjectForUpdateCommand(
override def run(sqlContext: SQLContext): Seq[Row] = {
-
- // sqlContext.sparkContext.setLocalProperty(org.apache.spark.sql.execution.SQLExecution
- // .EXECUTION_ID_KEY, null)
- // DataFrame(sqlContext, plan).show(truncate = false)
- // return Seq.empty
-
-
val res = plan find {
case relation: LogicalRelation if (relation.relation
.isInstanceOf[CarbonDatasourceRelation]) =>
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
index 70c7caf..e0a8b58 100644
--- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
-import org.apache.carbondata.hadoop.CarbonInputFormat
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat
/**
* All the utility functions for carbon plan creation
@@ -37,8 +37,8 @@ object QueryPlanUtil {
* createCarbonInputFormat from query model
*/
def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier) :
- (CarbonInputFormat[Array[Object]], Job) = {
- val carbonInputFormat = new CarbonInputFormat[Array[Object]]()
+ (CarbonTableInputFormat[Array[Object]], Job) = {
+ val carbonInputFormat = new CarbonTableInputFormat[Array[Object]]()
val jobConf: JobConf = new JobConf(new Configuration)
val job: Job = new Job(jobConf)
FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath))
@@ -46,8 +46,8 @@ object QueryPlanUtil {
}
def createCarbonInputFormat[V: ClassTag](absoluteTableIdentifier: AbsoluteTableIdentifier,
- conf: Configuration) : CarbonInputFormat[V] = {
- val carbonInputFormat = new CarbonInputFormat[V]()
+ conf: Configuration) : CarbonTableInputFormat[V] = {
+ val carbonInputFormat = new CarbonTableInputFormat[V]()
val job: Job = new Job(conf)
FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath))
carbonInputFormat
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
index d28044f..3b9c771 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala
@@ -30,13 +30,10 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
import org.apache.carbondata.core.scan.expression.Expression
import org.apache.carbondata.core.scan.expression.logical.AndExpression
-import org.apache.carbondata.core.util.{CarbonSessionInfo, SessionParams, ThreadLocalSessionInfo}
+import org.apache.carbondata.core.util.{CarbonSessionInfo, ThreadLocalSessionInfo}
import org.apache.carbondata.hadoop.CarbonProjection
-import org.apache.carbondata.hadoop.util.SchemaReader
-import org.apache.carbondata.processing.merger.TableMeta
import org.apache.carbondata.spark.CarbonFilters
import org.apache.carbondata.spark.rdd.CarbonScanRDD
-import org.apache.carbondata.spark.util.CarbonSparkUtil
case class CarbonDatasourceHadoopRelation(
sparkSession: SparkSession,
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala
index 04a94ce..9fde546 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/CarbonMetastore.scala
@@ -40,7 +40,8 @@ import org.apache.carbondata.core.datastore.filesystem.CarbonFile
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.datastore.impl.FileFactory.FileType
import org.apache.carbondata.core.fileoperations.FileWriteOperation
-import org.apache.carbondata.core.metadata.{CarbonMetadata, CarbonTableIdentifier}
+import org.apache.carbondata.core.indexstore.{DataMapStoreManager, DataMapType, TableDataMap}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonMetadata, CarbonTableIdentifier}
import org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl
import org.apache.carbondata.core.metadata.datatype.DataType.DECIMAL
import org.apache.carbondata.core.metadata.schema.table.CarbonTable
@@ -511,8 +512,10 @@ class CarbonMetastore(conf: RuntimeConfig, val storePath: String) {
val dbName = tableIdentifier.database.get
val tableName = tableIdentifier.table
- val metadataFilePath = CarbonStorePath.getCarbonTablePath(tableStorePath,
- new CarbonTableIdentifier(dbName, tableName, "")).getMetadataDirectoryPath
+ val tablePath = CarbonStorePath.getCarbonTablePath(tableStorePath,
+ new CarbonTableIdentifier(dbName, tableName, ""))
+ val metadataFilePath = tablePath.getMetadataDirectoryPath
+ val identifier = AbsoluteTableIdentifier.fromTablePath(tablePath.getPath)
val fileType = FileFactory.getFileType(metadataFilePath)
@@ -528,6 +531,7 @@ class CarbonMetastore(conf: RuntimeConfig, val storePath: String) {
case Some(tableMeta) =>
metadata.tablesMeta -= tableMeta
CarbonMetadata.getInstance.removeTable(dbName + "_" + tableName)
+ DataMapStoreManager.getInstance.clearDataMap(identifier, "blocklet")
updateSchemasUpdatedTime(touchSchemaFileSystemTime(dbName, tableName))
case None =>
LOGGER.info(s"Metadata does not contain entry for table $tableName in database $dbName")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
index 8cdcd26..99bfd44 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java
@@ -18,6 +18,7 @@ package org.apache.carbondata.processing.merger;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -379,4 +380,35 @@ public class CarbonCompactionUtil {
}
return restructuredBlockExists;
}
+
+ /**
+ * This method will check for any restructured block in the blocks selected for compaction
+ *
+ * @param segmentMapping
+ * @param tableLastUpdatedTime
+ * @return
+ */
+ public static boolean checkIfAnyRestructuredBlockExists(Map<String, TaskBlockInfo> segmentMapping,
+ long tableLastUpdatedTime) {
+ boolean restructuredBlockExists = false;
+ for (Map.Entry<String, TaskBlockInfo> taskMap : segmentMapping.entrySet()) {
+ String segmentId = taskMap.getKey();
+ TaskBlockInfo taskBlockInfo = taskMap.getValue();
+ Collection<List<TableBlockInfo>> infoList = taskBlockInfo.getAllTableBlockInfoList();
+ for (List<TableBlockInfo> listMetadata : infoList) {
+ for (TableBlockInfo blockInfo : listMetadata) {
+ // if schema modified timestamp is greater than footer stored schema timestamp,
+ // it indicates it is a restructured block
+ if (tableLastUpdatedTime > blockInfo.getDetailInfo().getSchemaUpdatedTimeStamp()) {
+ restructuredBlockExists = true;
+ break;
+ }
+ }
+ }
+ if (restructuredBlockExists) {
+ break;
+ }
+ }
+ return restructuredBlockExists;
+ }
}
[32/50] [abbrv] carbondata git commit: modify compare test
Posted by ja...@apache.org.
modify compare test
fix
fix style
change table
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/327b307f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/327b307f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/327b307f
Branch: refs/heads/datamap
Commit: 327b307fdddc7b0fffe5b86049d1a2d08dfb182a
Parents: d9c3b48
Author: jackylk <ja...@huawei.com>
Authored: Mon Jul 3 21:54:39 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Jul 5 21:34:56 2017 +0800
----------------------------------------------------------------------
.../carbondata/examples/CompareTest.scala | 103 ++++++++++++-------
1 file changed, 67 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/327b307f/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
index ee53c31..ffc4b22 100644
--- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CompareTest.scala
@@ -41,6 +41,7 @@ case class Query(sqlText: String, queryType: String, desc: String)
object CompareTest {
def parquetTableName: String = "comparetest_parquet"
+ def orcTableName: String = "comparetest_orc"
def carbonTableName(version: String): String = s"comparetest_carbonV$version"
// Table schema:
@@ -63,7 +64,7 @@ object CompareTest {
// +-------------+-----------+-------------+-------------+------------+
// | m4 | double | NA | measure | no |
// +-------------+-----------+-------------+-------------+------------+
- // | m5 | double | NA | measure | no |
+ // | m5 | decimal | NA | measure | no |
// +-------------+-----------+-------------+-------------+------------+
private def generateDataFrame(spark: SparkSession): DataFrame = {
val r = new Random()
@@ -71,10 +72,11 @@ object CompareTest {
.parallelize(1 to 10 * 1000 * 1000, 4)
.map { x =>
("city" + x % 8, "country" + x % 1103, "planet" + x % 10007, "IDENTIFIER" + x.toString,
- (x % 16).toShort, x / 2, (x << 1).toLong, x.toDouble / 13, x.toDouble / 11)
+ (x % 16).toShort, x / 2, (x << 1).toLong, x.toDouble / 13,
+ BigDecimal.valueOf(x.toDouble / 11))
}.map { x =>
- Row(x._1, x._2, x._3, x._4, x._5, x._6, x._7, x._8, x._9)
- }
+ Row(x._1, x._2, x._3, x._4, x._5, x._6, x._7, x._8, x._9)
+ }
val schema = StructType(
Seq(
@@ -86,7 +88,7 @@ object CompareTest {
StructField("m2", IntegerType, nullable = false),
StructField("m3", LongType, nullable = false),
StructField("m4", DoubleType, nullable = false),
- StructField("m5", DoubleType, nullable = false)
+ StructField("m5", DecimalType(30, 10), nullable = false)
)
)
@@ -142,12 +144,12 @@ object CompareTest {
// == FULL SCAN GROUP BY AGGREGATE ==
// ===========================================================================
Query(
- "select country, sum(m1) from $table group by country",
+ "select country, sum(m1) as metric from $table group by country order by metric",
"aggregate",
"group by on big data, on medium card column, medium result set,"
),
Query(
- "select city, sum(m1) from $table group by city",
+ "select city, sum(m1) as metric from $table group by city order by metric",
"aggregate",
"group by on big data, on low card column, small result set,"
),
@@ -170,17 +172,20 @@ object CompareTest {
// == FILTER SCAN GROUP BY AGGREGATION ==
// ===========================================================================
Query(
- "select country, sum(m1) from $table where city='city8' group by country ",
+ "select country, sum(m1) as metric from $table where city='city8' group by country " +
+ "order by metric",
"filter scan and aggregate",
"group by on large data, small result set"
),
Query(
- "select id, sum(m1) from $table where planet='planet10' group by id",
+ "select id, sum(m1) as metric from $table where planet='planet10' group by id " +
+ "order by metric",
"filter scan and aggregate",
"group by on medium data, large result set"
),
Query(
- "select city, sum(m1) from $table where country='country12' group by city ",
+ "select city, sum(m1) as metric from $table where country='country12' group by city " +
+ "order by metric",
"filter scan and aggregate",
"group by on medium data, small result set"
),
@@ -244,25 +249,35 @@ object CompareTest {
)
)
- private def loadParquetTable(spark: SparkSession, input: DataFrame): Double = time {
+ private def loadParquetTable(spark: SparkSession, input: DataFrame, table: String)
+ : Double = time {
// partitioned by last 1 digit of id column
val dfWithPartition = input.withColumn("partitionCol", input.col("id").%(10))
dfWithPartition.write
.partitionBy("partitionCol")
.mode(SaveMode.Overwrite)
- .parquet(parquetTableName)
+ .parquet(table)
+ spark.read.parquet(table).registerTempTable(table)
+ }
+
+ private def loadOrcTable(spark: SparkSession, input: DataFrame, table: String): Double = time {
+ // partitioned by last 1 digit of id column
+ input.write
+ .mode(SaveMode.Overwrite)
+ .orc(table)
+ spark.read.orc(table).registerTempTable(table)
}
- private def loadCarbonTable(spark: SparkSession, input: DataFrame, version: String): Double = {
+ private def loadCarbonTable(spark: SparkSession, input: DataFrame, tableName: String): Double = {
CarbonProperties.getInstance().addProperty(
CarbonCommonConstants.CARBON_DATA_FILE_VERSION,
- version
+ "3"
)
- spark.sql(s"drop table if exists ${carbonTableName(version)}")
+ spark.sql(s"drop table if exists $tableName")
time {
input.write
.format("carbondata")
- .option("tableName", carbonTableName(version))
+ .option("tableName", tableName)
.option("tempCSV", "false")
.option("single_pass", "true")
.option("dictionary_exclude", "id") // id is high cardinality column
@@ -273,18 +288,23 @@ object CompareTest {
}
// load data into parquet, carbonV2, carbonV3
- private def prepareTable(spark: SparkSession): Unit = {
+ private def prepareTable(spark: SparkSession, table1: String, table2: String): Unit = {
val df = generateDataFrame(spark).cache
println(s"loading ${df.count} records, schema: ${df.schema}")
- val loadParquetTime = loadParquetTable(spark, df)
- val loadCarbonV3Time = loadCarbonTable(spark, df, version = "3")
- println(s"load completed, time: $loadParquetTime, $loadCarbonV3Time")
+ val table1Time = if (table1.endsWith("parquet")) {
+ loadParquetTable(spark, df, table1)
+ } else if (table1.endsWith("orc")) {
+ loadOrcTable(spark, df, table1)
+ } else {
+ sys.error("invalid table: " + table1)
+ }
+ val table2Time = loadCarbonTable(spark, df, table2)
+ println(s"load completed, time: $table1Time, $table2Time")
df.unpersist()
- spark.read.parquet(parquetTableName).registerTempTable(parquetTableName)
}
// Run all queries for the specified table
- private def runQueries(spark: SparkSession, tableName: String): Array[(Double, Int)] = {
+ private def runQueries(spark: SparkSession, tableName: String): Array[(Double, Array[Row])] = {
println(s"start running queries for $tableName...")
var result: Array[Row] = null
queries.zipWithIndex.map { case (query, index) =>
@@ -294,37 +314,46 @@ object CompareTest {
result = spark.sql(sqlText).collect()
}
println(s"=> $rt sec")
- (rt, result.length)
+ (rt, result)
+ }
+ }
+
+ private def printErrorIfNotMatch(index: Int, table1: String, result1: Array[Row],
+ table2: String, result2: Array[Row]): Unit = {
+ if (!result1.sameElements(result2)) {
+ val num = index + 1
+ println(s"$table1 result for query $num: ")
+ println(s"""${result1.mkString(",")}""")
+ println(s"$table2 result for query $num: ")
+ println(s"""${result2.mkString(",")}""")
+ sys.error(s"result not matching for query $num (${queries(index).desc})")
}
}
// run testcases and print comparison result
- private def runTest(spark: SparkSession): Unit = {
+ private def runTest(spark: SparkSession, table1: String, table2: String): Unit = {
val formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val date = new Date
val timestamp = date.getTime
// run queries on parquet and carbon
- val parquetResult: Array[(Double, Int)] = runQueries(spark, parquetTableName)
+ val table1Result: Array[(Double, Array[Row])] = runQueries(spark, table1)
// do GC and sleep for some time before running next table
System.gc()
Thread.sleep(1000)
System.gc()
Thread.sleep(1000)
- val carbonResult: Array[(Double, Int)] = runQueries(spark, carbonTableName("3"))
+ val table2Result: Array[(Double, Array[Row])] = runQueries(spark, table2)
// check result by comparing output from parquet and carbon
- parquetResult.zipWithIndex.foreach { case (result, index) =>
- if (result._2 != carbonResult(index)._2) {
- sys.error(s"result not matching for query ${index + 1} (${queries(index).desc}): " +
- s"${result._2} and ${carbonResult(index)._2}")
- }
+ table1Result.zipWithIndex.foreach { case (result, index) =>
+ printErrorIfNotMatch(index, table1, result._2, table2, table2Result(index)._2)
}
// print all response time in JSON format, so that it can be analyzed later
queries.zipWithIndex.foreach { case (query, index) =>
println("{" +
s""""query":"${index + 1}", """ +
- s""""parquetTime":${parquetResult(index)._1}, """ +
- s""""carbonTime":${carbonResult(index)._1}, """ +
- s""""fetched":${parquetResult(index)._2}, """ +
+ s""""$table1 time":${table1Result(index)._1}, """ +
+ s""""$table2 time":${table2Result(index)._1}, """ +
+ s""""fetched":${table1Result(index)._2.length}, """ +
s""""type":"${query.queryType}", """ +
s""""desc":"${query.desc}", """ +
s""""date": "${formatter.format(date)}" """ +
@@ -351,8 +380,10 @@ object CompareTest {
.getOrCreateCarbonSession(storeLocation)
spark.sparkContext.setLogLevel("warn")
- prepareTable(spark)
- runTest(spark)
+ val table1 = parquetTableName
+ val table2 = carbonTableName("3")
+ prepareTable(spark, table1, table2)
+ runTest(spark, table1, table2)
spark.close()
}
[18/50] [abbrv] carbondata git commit: [CARBONDATA-1256] Rectify
Vector Buffer Overflow Calculation. This closes #1111
Posted by ja...@apache.org.
[CARBONDATA-1256] Rectify Vector Buffer Overflow Calculation. This closes #1111
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/27d520cc
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/27d520cc
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/27d520cc
Branch: refs/heads/datamap
Commit: 27d520cc0be71ab23c705e8c2e2a8fdcdbe589be
Parents: 924f0b7 03d484a
Author: ravipesala <ra...@gmail.com>
Authored: Mon Jul 3 15:07:37 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Mon Jul 3 15:07:37 2017 +0530
----------------------------------------------------------------------
.../collector/impl/DictionaryBasedVectorResultCollector.java | 5 +++--
.../collector/impl/RestructureBasedVectorResultCollector.java | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
[17/50] [abbrv] carbondata git commit: Rectify Vector Buffer
Calculation
Posted by ja...@apache.org.
Rectify Vector Buffer Calculation
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/03d484ab
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/03d484ab
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/03d484ab
Branch: refs/heads/datamap
Commit: 03d484abf2cf5c57aeda11ca9355cfbfbdf137c2
Parents: 924f0b7
Author: sounakr <so...@gmail.com>
Authored: Thu Jun 29 01:15:21 2017 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Mon Jul 3 15:06:43 2017 +0530
----------------------------------------------------------------------
.../collector/impl/DictionaryBasedVectorResultCollector.java | 5 +++--
.../collector/impl/RestructureBasedVectorResultCollector.java | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/03d484ab/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
index 73ccb5d..c857a47 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DictionaryBasedVectorResultCollector.java
@@ -139,8 +139,9 @@ public class DictionaryBasedVectorResultCollector extends AbstractScannedResultC
}
int rowCounter = scannedResult.getRowCounter();
int availableRows = currentPageRowCount - rowCounter;
- int requiredRows =
- columnarBatch.getBatchSize() - (columnarBatch.getActualSize() + filteredRows);
+ // getRowCounter holds total number or rows being placed in Vector. Calculate the
+ // Left over space through getRowCounter only.
+ int requiredRows = columnarBatch.getBatchSize() - columnarBatch.getRowCounter();
requiredRows = Math.min(requiredRows, availableRows);
if (requiredRows < 1) {
return;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/03d484ab/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java
index 6f45c47..8ae0d96 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RestructureBasedVectorResultCollector.java
@@ -103,7 +103,7 @@ public class RestructureBasedVectorResultCollector extends DictionaryBasedVector
}
int rowCounter = scannedResult.getRowCounter();
int availableRows = currentPageRowCount - rowCounter;
- int requiredRows = columnarBatch.getBatchSize() - columnarBatch.getActualSize();
+ int requiredRows = columnarBatch.getBatchSize() - columnarBatch.getRowCounter();
requiredRows = Math.min(requiredRows, availableRows);
if (requiredRows < 1) {
return;
[39/50] [abbrv] carbondata git commit: fix doc,
remove invalid description
Posted by ja...@apache.org.
fix doc, remove invalid description
This closes #1151
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0558c286
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0558c286
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0558c286
Branch: refs/heads/datamap
Commit: 0558c286b3935d44a095f64677dc6abf276054fd
Parents: ff7eba0
Author: Liang Chen <ch...@apache.org>
Authored: Sun Jul 9 06:32:10 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sun Jul 9 06:35:26 2017 +0800
----------------------------------------------------------------------
docs/configuration-parameters.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0558c286/docs/configuration-parameters.md
----------------------------------------------------------------------
diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md
index deb5924..19f3f1a 100644
--- a/docs/configuration-parameters.md
+++ b/docs/configuration-parameters.md
@@ -144,6 +144,5 @@ This section provides the details of all the configurations required for CarbonD
|----------------------------------------|--------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| spark.driver.memory | 1g | Amount of memory to be used by the driver process. |
| spark.executor.memory | 1g | Amount of memory to be used per executor process. |
-| spark.sql.bigdata.register.analyseRule | org.apache.spark.sql.hive.acl.CarbonAccessControlRules | CarbonAccessControlRules need to be set for enabling Access Control. |
-
\ No newline at end of file
+
[37/50] [abbrv] carbondata git commit: [CARBONDATA-1269][PRESTO]
Fixed bug for select operation in non existing database
Posted by ja...@apache.org.
[CARBONDATA-1269][PRESTO] Fixed bug for select operation in non existing database
This closes #1143
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c7e7f70b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c7e7f70b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c7e7f70b
Branch: refs/heads/datamap
Commit: c7e7f70b827aba3fbd3aab849cc2a50257627cd5
Parents: 92d1d97
Author: Geetika Gupta <ge...@knoldus.in>
Authored: Thu Jul 6 11:44:06 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Fri Jul 7 08:41:16 2017 +0800
----------------------------------------------------------------------
.../main/java/org/apache/carbondata/presto/CarbondataMetadata.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c7e7f70b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
index f2d19cf..a132fb4 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
@@ -111,7 +111,7 @@ public class CarbondataMetadata implements ConnectorMetadata {
private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) {
if (!listSchemaNamesInternal().contains(schemaTableName.getSchemaName())) {
- return null;
+ throw new SchemaNotFoundException(schemaTableName.getSchemaName());
}
CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
[14/50] [abbrv] carbondata git commit: [CARBONDATA-1248] change
LazyColumnPage parent class This closes #1114
Posted by ja...@apache.org.
[CARBONDATA-1248] change LazyColumnPage parent class This closes #1114
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/92ba101d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/92ba101d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/92ba101d
Branch: refs/heads/datamap
Commit: 92ba101da43f9047d8384e764b95b991ae939b8e
Parents: 3e726e0 53b92e5
Author: QiangCai <qi...@qq.com>
Authored: Sat Jul 1 19:05:06 2017 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Sat Jul 1 19:05:06 2017 +0800
----------------------------------------------------------------------
.../core/datastore/page/LazyColumnPage.java | 164 ++++++++++++++++++-
1 file changed, 163 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
[38/50] [abbrv] carbondata git commit: [CARBONDATA-1280] Solve
HiveExample dependency issues and fix spark 1.6 CI
Posted by ja...@apache.org.
[CARBONDATA-1280] Solve HiveExample dependency issues and fix spark 1.6 CI
This closes #1150
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ff7eba0b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ff7eba0b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ff7eba0b
Branch: refs/heads/datamap
Commit: ff7eba0b838bbf4b9361d5a6a0859d7af104b9a5
Parents: c7e7f70
Author: chenliang613 <ch...@apache.org>
Authored: Sat Jul 8 23:53:02 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jul 8 23:58:57 2017 +0800
----------------------------------------------------------------------
integration/hive/pom.xml | 18 ++++++++++++++++++
pom.xml | 6 ++++--
2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff7eba0b/integration/hive/pom.xml
----------------------------------------------------------------------
diff --git a/integration/hive/pom.xml b/integration/hive/pom.xml
index 5a33958..3f1d6ef 100644
--- a/integration/hive/pom.xml
+++ b/integration/hive/pom.xml
@@ -78,6 +78,24 @@
<groupId>org.apache.carbondata</groupId>
<artifactId>carbondata-spark2</artifactId>
<version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive-thriftserver_2.10</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-repl_2.10</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_2.10</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff7eba0b/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 7065bee..7af7b64 100644
--- a/pom.xml
+++ b/pom.xml
@@ -103,8 +103,6 @@
<module>integration/spark-common-test</module>
<module>assembly</module>
<module>examples/flink</module>
- <module>integration/hive</module>
- <module>integration/presto</module>
</modules>
@@ -312,6 +310,8 @@
<module>examples/spark</module>
<module>integration/spark2</module>
<module>examples/spark2</module>
+ <module>integration/hive</module>
+ <module>integration/presto</module>
</modules>
</profile>
<profile>
@@ -363,6 +363,8 @@
</properties>
<modules>
<module>integration/spark2</module>
+ <module>integration/hive</module>
+ <module>integration/presto</module>
<module>examples/spark2</module>
</modules>
</profile>
[09/50] [abbrv] carbondata git commit: add test case when record more
than one pagesize
Posted by ja...@apache.org.
add test case when record more than one pagesize
delete unnecessary test case
add test case for IUD feature
add test case for IUD feature
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7e2e86e5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7e2e86e5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7e2e86e5
Branch: refs/heads/datamap
Commit: 7e2e86e5c156fde3a0fa028a081e4caf2724513c
Parents: 28f8a0b
Author: chenerlu <ch...@huawei.com>
Authored: Thu Jun 29 11:36:10 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Fri Jun 30 20:33:49 2017 +0800
----------------------------------------------------------------------
.../iud/DeleteCarbonTableTestCase.scala | 31 ++++++++++++++++++--
.../iud/UpdateCarbonTableTestCase.scala | 2 ++
2 files changed, 30 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7e2e86e5/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala
index 2e59c9c..d24f7b2 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala
@@ -16,12 +16,10 @@
*/
package org.apache.carbondata.spark.testsuite.iud
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{Row, SaveMode}
import org.apache.spark.sql.common.util.QueryTest
import org.scalatest.BeforeAndAfterAll
-import org.apache.carbondata.core.constants.CarbonCommonConstants
-import org.apache.carbondata.core.util.CarbonProperties
class DeleteCarbonTableTestCase extends QueryTest with BeforeAndAfterAll {
override def beforeAll {
@@ -105,6 +103,33 @@ class DeleteCarbonTableTestCase extends QueryTest with BeforeAndAfterAll {
Seq(Row(3))
)
}
+
+ test("Records more than one pagesize after delete operation ") {
+ sql("DROP TABLE IF EXISTS default.carbon2")
+ import sqlContext.implicits._
+ val df = sqlContext.sparkContext.parallelize(1 to 2000000)
+ .map(x => (x+"a", "b", x))
+ .toDF("c1", "c2", "c3")
+ df.write
+ .format("carbondata")
+ .option("tableName", "carbon2")
+ .option("tempCSV", "true")
+ .option("compress", "true")
+ .mode(SaveMode.Overwrite)
+ .save()
+
+ checkAnswer(sql("select count(*) from default.carbon2"), Seq(Row(2000000)))
+
+ sql("delete from default.carbon2 where c1 = '99999a'").show()
+
+ checkAnswer(sql("select count(*) from default.carbon2"), Seq(Row(1999999)))
+
+ checkAnswer(sql("select * from default.carbon2 where c1 = '99999a'"), Seq())
+
+ sql("DROP TABLE IF EXISTS default.carbon2")
+ }
+
+
override def afterAll {
sql("use default")
sql("drop database if exists iud_db cascade")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/7e2e86e5/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala
index 79fda30..c8a1c63 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala
@@ -406,6 +406,8 @@ class UpdateCarbonTableTestCase extends QueryTest with BeforeAndAfterAll {
checkAnswer(sql("select count(*) from default.carbon1"), Seq(Row(36000)))
+ checkAnswer(sql("select * from default.carbon1 where c1 = 'test123'"), Row("test123","b",9999))
+
sql("DROP TABLE IF EXISTS default.carbon1")
}
[20/50] [abbrv] carbondata git commit: [CARBONDATA-1253] Sort_columns
should not support float, double, decimal
Posted by ja...@apache.org.
[CARBONDATA-1253] Sort_columns should not support float,double,decimal
This closes #1122
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5f9741eb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5f9741eb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5f9741eb
Branch: refs/heads/datamap
Commit: 5f9741ebcb5b2b606f4d710785ce97c4d6b49229
Parents: 0d46976
Author: QiangCai <qi...@qq.com>
Authored: Fri Jun 30 19:51:19 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jul 3 20:53:20 2017 +0800
----------------------------------------------------------------------
...feVariableLengthDimensionDataChunkStore.java | 12 ---
...afeVariableLengthDimesionDataChunkStore.java | 12 ---
.../apache/carbondata/core/util/ByteUtil.java | 81 --------------------
.../carbondata/core/util/DataTypeUtil.java | 12 ---
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 12 ++-
5 files changed, 10 insertions(+), 119 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f9741eb/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
index a65d745..2079811 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java
@@ -25,10 +25,6 @@ import org.apache.carbondata.core.util.ByteUtil;
import org.apache.spark.sql.types.BooleanType;
import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.DecimalType;
-import org.apache.spark.sql.types.DoubleType;
-import org.apache.spark.sql.types.FloatType;
import org.apache.spark.sql.types.IntegerType;
import org.apache.spark.sql.types.LongType;
import org.apache.spark.sql.types.ShortType;
@@ -157,16 +153,8 @@ public class SafeVariableLengthDimensionDataChunkStore extends SafeAbsractDimens
vector.putShort(vectorRow, ByteUtil.toShort(data, currentDataOffset, length));
} else if (dt instanceof IntegerType) {
vector.putInt(vectorRow, ByteUtil.toInt(data, currentDataOffset, length));
- } else if (dt instanceof FloatType) {
- vector.putFloat(vectorRow, ByteUtil.toFloat(data, currentDataOffset));
- } else if (dt instanceof DoubleType) {
- vector.putDouble(vectorRow, ByteUtil.toDouble(data, currentDataOffset));
} else if (dt instanceof LongType) {
vector.putLong(vectorRow, ByteUtil.toLong(data, currentDataOffset, length));
- } else if (dt instanceof DecimalType) {
- vector.putDecimal(vectorRow,
- Decimal.apply(ByteUtil.toBigDecimal(data, currentDataOffset, length)),
- DecimalType.MAX_PRECISION());
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f9741eb/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
index 03ba34e..c6c98f9 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimesionDataChunkStore.java
@@ -26,10 +26,6 @@ import org.apache.carbondata.core.util.ByteUtil;
import org.apache.spark.sql.types.BooleanType;
import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.DecimalType;
-import org.apache.spark.sql.types.DoubleType;
-import org.apache.spark.sql.types.FloatType;
import org.apache.spark.sql.types.IntegerType;
import org.apache.spark.sql.types.LongType;
import org.apache.spark.sql.types.ShortType;
@@ -184,16 +180,8 @@ public class UnsafeVariableLengthDimesionDataChunkStore
vector.putShort(vectorRow, ByteUtil.toShort(value, 0, value.length));
} else if (dt instanceof IntegerType) {
vector.putInt(vectorRow, ByteUtil.toInt(value, 0, value.length));
- } else if (dt instanceof FloatType) {
- vector.putFloat(vectorRow, ByteUtil.toFloat(value, 0));
- } else if (dt instanceof DoubleType) {
- vector.putDouble(vectorRow, ByteUtil.toDouble(value, 0));
} else if (dt instanceof LongType) {
vector.putLong(vectorRow, ByteUtil.toLong(value, 0, value.length));
- } else if (dt instanceof DecimalType) {
- vector.putDecimal(vectorRow,
- Decimal.apply(ByteUtil.toBigDecimal(value, 0, value.length)),
- DecimalType.MAX_PRECISION());
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f9741eb/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
index 2761d71..b74f206 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java
@@ -18,8 +18,6 @@
package org.apache.carbondata.core.util;
import java.io.UnsupportedEncodingException;
-import java.math.BigDecimal;
-import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
@@ -39,8 +37,6 @@ public final class ByteUtil {
public static final String UTF8_CSN = StandardCharsets.UTF_8.name();
- public static final byte[] ZERO_IN_BYTES = toBytes(0);
-
private ByteUtil() {
}
@@ -531,28 +527,6 @@ public final class ByteUtil {
}
/**
- * float => byte[]
- *
- * @param f
- * @return
- */
- public static byte[] toBytes(final float f) {
- // Encode it as int
- return toBytes(Float.floatToRawIntBits(f));
- }
-
- /**
- * byte[] => float
- *
- * @param bytes
- * @param offset
- * @return
- */
- public static float toFloat(byte[] bytes, int offset) {
- return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
- }
-
- /**
* long => byte[]
*
* @param val
@@ -593,61 +567,6 @@ public final class ByteUtil {
return l ^ Long.MIN_VALUE;
}
- /**
- * doube => byte[]
- *
- * @param d
- * @return
- */
- public static byte[] toBytes(final double d) {
- // Encode it as a long
- return toBytes(Double.doubleToRawLongBits(d));
- }
-
- /**
- * byte[] => double
- *
- * @param bytes
- * @param offset
- * @return
- */
- public static double toDouble(final byte[] bytes, final int offset) {
- return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
- }
-
- /**
- * BigDecimal => byte[]
- *
- * @param val
- * @return
- */
- public static byte[] toBytes(BigDecimal val) {
- byte[] valueBytes = val.unscaledValue().toByteArray();
- byte[] result = new byte[valueBytes.length + SIZEOF_INT];
- int offset = putInt(result, 0, val.scale());
- putBytes(result, offset, valueBytes, 0, valueBytes.length);
- return result;
- }
-
- /**
- * byte[] => BigDecimal
- *
- * @param bytes
- * @param offset
- * @param length
- * @return
- */
- public static BigDecimal toBigDecimal(byte[] bytes, int offset, final int length) {
- if (bytes == null || length < SIZEOF_INT + 1 || (offset + length > bytes.length)) {
- return null;
- }
-
- int scale = toInt(bytes, offset, bytes.length);
- byte[] tcBytes = new byte[length - SIZEOF_INT];
- System.arraycopy(bytes, offset + SIZEOF_INT, tcBytes, 0, length - SIZEOF_INT);
- return new BigDecimal(new BigInteger(tcBytes), scale);
- }
-
private static IllegalArgumentException explainWrongLengthOrOffset(final byte[] bytes,
final int offset, final int length, final int expectedLength) {
String reason;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f9741eb/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
index 1b1884e..37ae5bb 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java
@@ -334,14 +334,8 @@ public final class DataTypeUtil {
return ByteUtil.toBytes(Short.parseShort(dimensionValue));
case INT:
return ByteUtil.toBytes(Integer.parseInt(dimensionValue));
- case FLOAT:
- return ByteUtil.toBytes(Float.parseFloat(dimensionValue));
case LONG:
return ByteUtil.toBytes(Long.parseLong(dimensionValue));
- case DOUBLE:
- return ByteUtil.toBytes(Double.parseDouble(dimensionValue));
- case DECIMAL:
- return ByteUtil.toBytes(new BigDecimal(dimensionValue));
default:
return ByteUtil.toBytes(dimensionValue);
}
@@ -372,14 +366,8 @@ public final class DataTypeUtil {
return ByteUtil.toShort(dataInBytes, 0, dataInBytes.length);
case INT:
return ByteUtil.toInt(dataInBytes, 0, dataInBytes.length);
- case FLOAT:
- return ByteUtil.toFloat(dataInBytes, 0);
case LONG:
return ByteUtil.toLong(dataInBytes, 0, dataInBytes.length);
- case DOUBLE:
- return ByteUtil.toDouble(dataInBytes, 0);
- case DECIMAL:
- return ByteUtil.toBigDecimal(dataInBytes, 0, dataInBytes.length);
default:
return ByteUtil.toString(dataInBytes, 0, dataInBytes.length);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5f9741eb/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index c565c31..8207a9d 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -551,8 +551,8 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
} else {
val dataType = fields.find(x =>
x.column.equalsIgnoreCase(column)).get.dataType.get
- if (isComplexDimDictionaryExclude(dataType)) {
- val errormsg = "sort_columns is unsupported for complex datatype column: " + column
+ if (isDataTypeSupportedForSortColumn(dataType)) {
+ val errormsg = s"sort_columns is unsupported for ${dataType} datatype column: " + column
throw new MalformedCarbonCommandException(errormsg)
}
}
@@ -691,6 +691,14 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
}
/**
+ * detects whether datatype is part of sort_column
+ */
+ private def isDataTypeSupportedForSortColumn(columnDataType: String): Boolean = {
+ val dataTypes = Array("array", "struct", "double", "float", "decimal")
+ dataTypes.exists(x => x.equalsIgnoreCase(columnDataType))
+ }
+
+ /**
* detects whether datatype is part of dictionary_exclude
*/
def isDataTypeSupportedForDictionary_Exclude(columnDataType: String): Boolean = {
[06/50] [abbrv] carbondata git commit: [CARBONDATA-1181] Support Show
partitions DDL. This closes #1094.
Posted by ja...@apache.org.
[CARBONDATA-1181] Support Show partitions DDL. This closes #1094.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/126a041a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/126a041a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/126a041a
Branch: refs/heads/datamap
Commit: 126a041aa564b0457c03af09a49a7b99a8afd99e
Parents: c2b39b2 c3bfc4a
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 13:16:02 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 13:16:02 2017 +0530
----------------------------------------------------------------------
.../examples/CarbonPartitionExample.scala | 147 +++++++++++++
.../examples/CarbonPartitionExample.scala | 49 ++++-
.../partition/TestShowPartitions.scala | 216 +++++++++++++++++++
.../carbondata/spark/util/CommonUtil.scala | 47 ++++
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 1 +
.../spark/sql/CarbonCatalystOperators.scala | 9 +-
.../org/apache/spark/sql/CarbonSqlParser.scala | 10 +-
.../execution/command/carbonTableSchema.scala | 28 +++
.../spark/sql/hive/CarbonStrategies.scala | 16 ++
.../sql/execution/command/DDLStrategy.scala | 8 +
.../execution/command/carbonTableSchema.scala | 28 +++
11 files changed, 553 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
[08/50] [abbrv] carbondata git commit: [CARBONDATA-1236] Support
absolute path without URI schema in loading. This closes #1118.
Posted by ja...@apache.org.
[CARBONDATA-1236] Support absolute path without URI schema in loading. This closes #1118.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/28f8a0b3
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/28f8a0b3
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/28f8a0b3
Branch: refs/heads/datamap
Commit: 28f8a0b376517be1b3b043f0254ad1899d263147
Parents: 126a041 c671c5b
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 18:57:58 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 18:57:58 2017 +0530
----------------------------------------------------------------------
.../apache/carbondata/core/util/CarbonUtil.java | 50 +++++------
.../carbondata/core/util/CarbonUtilTest.java | 89 +++++++++++++++++++-
2 files changed, 112 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
[25/50] [abbrv] carbondata git commit: single_pass blocked for
global_sort
Posted by ja...@apache.org.
single_pass blocked for global_sort
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a4083bf1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a4083bf1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a4083bf1
Branch: refs/heads/datamap
Commit: a4083bf1ada27d257ce2f666018ef361c7f60c1d
Parents: b699ee6
Author: rahulforallp <ra...@knoldus.in>
Authored: Fri Jun 30 00:06:56 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 09:00:16 2017 +0530
----------------------------------------------------------------------
.../execution/command/carbonTableSchema.scala | 1 +
.../execution/command/carbonTableSchema.scala | 221 +++++++++++--------
.../store/CarbonFactDataHandlerColumnar.java | 8 +
3 files changed, 137 insertions(+), 93 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/a4083bf1/integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index fa5a0d6..ee77f35 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -450,6 +450,7 @@ class TableNewProcessor(cm: TableModel) {
// Setting the boolean value of useInvertedIndex in column schema
val noInvertedIndexCols = cm.noInvertedIdxCols.getOrElse(Seq())
+ LOGGER.info("NoINVERTEDINDEX columns are : " + noInvertedIndexCols.mkString(","))
for (column <- allColumns) {
// When the column is measure or the specified no inverted index column in DDL,
// set useInvertedIndex to false, otherwise true.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/a4083bf1/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index bb5bdd1..8e7db45 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -356,6 +356,84 @@ case class LoadTable(
val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
+ private def getFinalOptions(carbonProperty: CarbonProperties): scala.collection
+ .mutable.Map[String, String] = {
+ var optionsFinal = scala.collection.mutable.Map[String, String]()
+ optionsFinal.put("delimiter", options.getOrElse("delimiter", ","))
+ optionsFinal.put("quotechar", options.getOrElse("quotechar", "\""))
+ optionsFinal.put("fileheader", options.getOrElse("fileheader", ""))
+ optionsFinal.put("escapechar", options.getOrElse("escapechar", "\\"))
+ optionsFinal.put("commentchar", options.getOrElse("commentchar", "#"))
+ optionsFinal.put("columndict", options.getOrElse("columndict", null))
+ optionsFinal
+ .put("serialization_null_format", options.getOrElse("serialization_null_format", "\\N"))
+ optionsFinal.put("bad_records_logger_enable", options.getOrElse("bad_records_logger_enable",
+ carbonProperty
+ .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE,
+ CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT)))
+ val badRecordActionValue = carbonProperty
+ .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
+ CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
+ optionsFinal.put("bad_records_action", options.getOrElse("bad_records_action", carbonProperty
+ .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_ACTION,
+ badRecordActionValue)))
+ optionsFinal
+ .put("is_empty_data_bad_record", options.getOrElse("is_empty_data_bad_record", carbonProperty
+ .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD,
+ CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT)))
+ optionsFinal.put("all_dictionary_path", options.getOrElse("all_dictionary_path", ""))
+ optionsFinal
+ .put("complex_delimiter_level_1", options.getOrElse("complex_delimiter_level_1", "\\$"))
+ optionsFinal
+ .put("complex_delimiter_level_2", options.getOrElse("complex_delimiter_level_2", "\\:"))
+ optionsFinal.put("dateformat", options.getOrElse("dateformat",
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT,
+ CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT)))
+
+ optionsFinal.put("global_sort_partitions", options.getOrElse("global_sort_partitions",
+ carbonProperty
+ .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, null)))
+
+ optionsFinal.put("maxcolumns", options.getOrElse("maxcolumns", null))
+ optionsFinal.put("sort_scope", options
+ .getOrElse("sort_scope",
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
+ carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
+ CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT))))
+
+ optionsFinal.put("batch_sort_size_inmb", options.getOrElse("batch_sort_size_inmb",
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BATCH_SORT_SIZE_INMB,
+ carbonProperty.getProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB,
+ CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB_DEFAULT))))
+ optionsFinal.put("bad_record_path", options.getOrElse("bad_record_path",
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH,
+ carbonProperty.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
+ CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL))))
+
+ val useOnePass = options.getOrElse("single_pass",
+ carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS,
+ CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS_DEFAULT)).trim.toLowerCase match {
+ case "true" =>
+ true
+ case "false" =>
+ // when single_pass = false and if either alldictionarypath
+ // or columnDict is configured the do not allow load
+ if (StringUtils.isNotEmpty(optionsFinal.get("all_dictionary_path").get) ||
+ StringUtils.isNotEmpty(optionsFinal.get("columndict").get)) {
+ throw new MalformedCarbonCommandException(
+ "Can not use all_dictionary_path or columndict without single_pass.")
+ } else {
+ false
+ }
+ case illegal =>
+ LOGGER.error(s"Can't use single_pass, because illegal syntax found: [" + illegal + "] " +
+ "Please set it as 'true' or 'false'")
+ false
+ }
+ optionsFinal.put("single_pass", useOnePass.toString)
+ optionsFinal
+ }
+
private def checkDefaultValue(value: String, default: String) = {
if (StringUtils.isEmpty(value)) {
default
@@ -390,6 +468,7 @@ case class LoadTable(
val carbonProperty: CarbonProperties = CarbonProperties.getInstance()
carbonProperty.addProperty("zookeeper.enable.lock", "false")
+ val optionsFinal = getFinalOptions(carbonProperty)
val carbonLock = CarbonLockFactory
.getCarbonLockObj(relation.tableMeta.carbonTable.getAbsoluteTableIdentifier
.getCarbonTableIdentifier,
@@ -426,66 +505,39 @@ case class LoadTable(
val partitionLocation = relation.tableMeta.storePath + "/partition/" +
relation.tableMeta.carbonTableIdentifier.getDatabaseName + "/" +
relation.tableMeta.carbonTableIdentifier.getTableName + "/"
-
-
val columnar = sparkSession.conf.get("carbon.is.columnar.storage", "true").toBoolean
-
- val delimiter = options.getOrElse("delimiter", ",")
- val quoteChar = options.getOrElse("quotechar", "\"")
- val fileHeader = options.getOrElse("fileheader", "")
- val escapeChar = options.getOrElse("escapechar", "\\")
- val commentChar = options.getOrElse("commentchar", "#")
- val columnDict = options.getOrElse("columndict", null)
- val serializationNullFormat = options.getOrElse("serialization_null_format", "\\N")
- val badRecordsLoggerEnable = options.getOrElse("bad_records_logger_enable",
- carbonProperty
- .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE,
- CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_LOGGER_ENABLE_DEFAULT))
- val badRecordActionValue = carbonProperty
- .getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION,
- CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
- val badRecordsAction = options.getOrElse("bad_records_action", carbonProperty
- .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORDS_ACTION,
- badRecordActionValue))
- val isEmptyDataBadRecord = options.getOrElse("is_empty_data_bad_record", carbonProperty
- .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD,
- CarbonLoadOptionConstants.CARBON_OPTIONS_IS_EMPTY_DATA_BAD_RECORD_DEFAULT))
- val allDictionaryPath = options.getOrElse("all_dictionary_path", "")
- val complex_delimiter_level_1 = options.getOrElse("complex_delimiter_level_1", "\\$")
- val complex_delimiter_level_2 = options.getOrElse("complex_delimiter_level_2", "\\:")
- val dateFormat = options.getOrElse("dateformat",
- carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT,
- CarbonLoadOptionConstants.CARBON_OPTIONS_DATEFORMAT_DEFAULT))
+ val sort_scope = optionsFinal.get("sort_scope").get
+ val single_pass = optionsFinal.get("single_pass").get
+ val bad_records_logger_enable = optionsFinal.get("bad_records_logger_enable").get
+ val bad_records_action = optionsFinal.get("bad_records_action").get
+ val bad_record_path = optionsFinal.get("bad_record_path").get
+ val global_sort_partitions = optionsFinal.get("global_sort_partitions").get
+ val dateFormat = optionsFinal.get("dateformat").get
+ val delimeter = optionsFinal.get("delimiter").get
+ val complex_delimeter_level1 = optionsFinal.get("complex_delimiter_level_1").get
+ val complex_delimeter_level2 = optionsFinal.get("complex_delimiter_level_2").get
+ val all_dictionary_path = optionsFinal.get("all_dictionary_path").get
+ val column_dict = optionsFinal.get("columndict").get
+ if (sort_scope.equals("GLOBAL_SORT") &&
+ single_pass.equals("TRUE")) {
+ sys.error("Global_Sort can't be used with single_pass flow")
+ }
ValidateUtil.validateDateFormat(dateFormat, table, tableName)
- val maxColumns = options.getOrElse("maxcolumns", null)
- val sortScope = options
- .getOrElse("sort_scope",
- carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SORT_SCOPE,
- carbonProperty.getProperty(CarbonCommonConstants.LOAD_SORT_SCOPE,
- CarbonCommonConstants.LOAD_SORT_SCOPE_DEFAULT)))
- ValidateUtil.validateSortScope(table, sortScope)
- val batchSortSizeInMB = options.getOrElse("batch_sort_size_inmb",
- carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BATCH_SORT_SIZE_INMB,
- carbonProperty.getProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB,
- CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB_DEFAULT)))
- val bad_record_path = options.getOrElse("bad_record_path",
- carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH,
- carbonProperty.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC,
- CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL)))
- if (badRecordsLoggerEnable.toBoolean ||
- LoggerAction.REDIRECT.name().equalsIgnoreCase(badRecordsAction)) {
+ ValidateUtil.validateSortScope(table, sort_scope)
+
+
+ if (bad_records_logger_enable.toBoolean ||
+ LoggerAction.REDIRECT.name().equalsIgnoreCase(bad_records_action)) {
if (!CarbonUtil.isValidBadStorePath(bad_record_path)) {
sys.error("Invalid bad records location.")
}
}
carbonLoadModel.setBadRecordsLocation(bad_record_path)
- val globalSortPartitions = options.getOrElse("global_sort_partitions",
- carbonProperty
- .getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_GLOBAL_SORT_PARTITIONS, null))
- ValidateUtil.validateGlobalSortPartitions(globalSortPartitions)
- carbonLoadModel.setEscapeChar(checkDefaultValue(escapeChar, "\\"))
- carbonLoadModel.setQuoteChar(checkDefaultValue(quoteChar, "\""))
- carbonLoadModel.setCommentChar(checkDefaultValue(commentChar, "#"))
+
+ ValidateUtil.validateGlobalSortPartitions(global_sort_partitions)
+ carbonLoadModel.setEscapeChar(checkDefaultValue(optionsFinal.get("escapechar").get, "\\"))
+ carbonLoadModel.setQuoteChar(checkDefaultValue(optionsFinal.get("quotechar").get, "\""))
+ carbonLoadModel.setCommentChar(checkDefaultValue(optionsFinal.get("commentchar").get, "#"))
carbonLoadModel.setDateFormat(dateFormat)
carbonLoadModel.setDefaultTimestampFormat(carbonProperty.getProperty(
CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
@@ -495,65 +547,48 @@ case class LoadTable(
CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
carbonLoadModel
.setSerializationNullFormat(
- TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + "," + serializationNullFormat)
+ TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName + "," +
+ optionsFinal.get("serialization_null_format").get)
carbonLoadModel
.setBadRecordsLoggerEnable(
- TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName + "," + badRecordsLoggerEnable)
+ TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName + "," + bad_records_logger_enable)
carbonLoadModel
.setBadRecordsAction(
- TableOptionConstant.BAD_RECORDS_ACTION.getName + "," + badRecordsAction)
+ TableOptionConstant.BAD_RECORDS_ACTION.getName + "," + bad_records_action)
carbonLoadModel
.setIsEmptyDataBadRecord(
- DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + isEmptyDataBadRecord)
- carbonLoadModel.setSortScope(sortScope)
- carbonLoadModel.setBatchSortSizeInMb(batchSortSizeInMB)
- carbonLoadModel.setGlobalSortPartitions(globalSortPartitions)
- val useOnePass = options.getOrElse("single_pass",
- carbonProperty.getProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS,
- CarbonLoadOptionConstants.CARBON_OPTIONS_SINGLE_PASS_DEFAULT)).trim.toLowerCase match {
- case "true" =>
- true
- case "false" =>
- // when single_pass = false and if either alldictionarypath
- // or columnDict is configured the do not allow load
- if (StringUtils.isNotEmpty(allDictionaryPath) || StringUtils.isNotEmpty(columnDict)) {
- throw new MalformedCarbonCommandException(
- "Can not use all_dictionary_path or columndict without single_pass.")
- } else {
- false
- }
- case illegal =>
- LOGGER.error(s"Can't use single_pass, because illegal syntax found: [" + illegal + "] " +
- "Please set it as 'true' or 'false'")
- false
- }
- carbonLoadModel.setUseOnePass(useOnePass)
- if (delimiter.equalsIgnoreCase(complex_delimiter_level_1) ||
- complex_delimiter_level_1.equalsIgnoreCase(complex_delimiter_level_2) ||
- delimiter.equalsIgnoreCase(complex_delimiter_level_2)) {
+ DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," +
+ optionsFinal.get("is_empty_data_bad_record").get)
+ carbonLoadModel.setSortScope(sort_scope)
+ carbonLoadModel.setBatchSortSizeInMb(optionsFinal.get("batch_sort_size_inmb").get)
+ carbonLoadModel.setGlobalSortPartitions(global_sort_partitions)
+ carbonLoadModel.setUseOnePass(single_pass.toBoolean)
+ if (delimeter.equalsIgnoreCase(complex_delimeter_level1) ||
+ complex_delimeter_level1.equalsIgnoreCase(complex_delimeter_level2) ||
+ delimeter.equalsIgnoreCase(complex_delimeter_level2)) {
sys.error(s"Field Delimiter & Complex types delimiter are same")
}
else {
carbonLoadModel.setComplexDelimiterLevel1(
- CarbonUtil.delimiterConverter(complex_delimiter_level_1))
+ CarbonUtil.delimiterConverter(complex_delimeter_level1))
carbonLoadModel.setComplexDelimiterLevel2(
- CarbonUtil.delimiterConverter(complex_delimiter_level_2))
+ CarbonUtil.delimiterConverter(complex_delimeter_level2))
}
// set local dictionary path, and dictionary file extension
- carbonLoadModel.setAllDictPath(allDictionaryPath)
+ carbonLoadModel.setAllDictPath(all_dictionary_path)
val partitionStatus = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
try {
// First system has to partition the data first and then call the load data
LOGGER.info(s"Initiating Direct Load for the Table : ($dbName.$tableName)")
carbonLoadModel.setFactFilePath(factPath)
- carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimiter))
- carbonLoadModel.setCsvHeader(fileHeader)
- carbonLoadModel.setColDictFilePath(columnDict)
+ carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimeter))
+ carbonLoadModel.setCsvHeader(optionsFinal.get("fileheader").get)
+ carbonLoadModel.setColDictFilePath(column_dict)
carbonLoadModel.setDirectLoad(true)
carbonLoadModel.setCsvHeaderColumns(CommonUtil.getCsvHeaderColumns(carbonLoadModel))
val validatedMaxColumns = CommonUtil.validateMaxColumns(carbonLoadModel.getCsvHeaderColumns,
- maxColumns)
+ optionsFinal.get("maxcolumns").get)
carbonLoadModel.setMaxColumns(validatedMaxColumns.toString)
GlobalDictionaryUtil.updateTableMetadataFunc = LoadTable.updateTableMetadata
val storePath = relation.tableMeta.storePath
@@ -561,7 +596,7 @@ case class LoadTable(
CommonUtil.readLoadMetadataDetails(carbonLoadModel, storePath)
}
if (carbonLoadModel.getLoadMetadataDetails.isEmpty && carbonLoadModel.getUseOnePass &&
- StringUtils.isEmpty(columnDict) && StringUtils.isEmpty(allDictionaryPath)) {
+ StringUtils.isEmpty(column_dict) && StringUtils.isEmpty(all_dictionary_path)) {
LOGGER.info(s"Cannot use single_pass=true for $dbName.$tableName during the first load")
LOGGER.audit(s"Cannot use single_pass=true for $dbName.$tableName during the first load")
carbonLoadModel.setUseOnePass(false)
@@ -583,7 +618,7 @@ case class LoadTable(
.generatePredefinedColDictionary(colDictFilePath, carbonTableIdentifier,
dimensions, carbonLoadModel, sparkSession.sqlContext, storePath, dictFolderPath)
}
- if (!StringUtils.isEmpty(allDictionaryPath)) {
+ if (!StringUtils.isEmpty(all_dictionary_path)) {
carbonLoadModel.initPredefDictMap()
GlobalDictionaryUtil
.generateDictionaryFromDictionaryFiles(sparkSession.sqlContext,
@@ -592,7 +627,7 @@ case class LoadTable(
carbonTableIdentifier,
dictFolderPath,
dimensions,
- allDictionaryPath)
+ all_dictionary_path)
}
// dictionaryServerClient dictionary generator
val dictionaryServerPort = carbonProperty
http://git-wip-us.apache.org/repos/asf/carbondata/blob/a4083bf1/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index 01e3ab6..429c5a3 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -48,6 +48,7 @@ import org.apache.carbondata.core.metadata.CarbonMetadata;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.NodeHolder;
@@ -201,6 +202,13 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler {
}
this.version = CarbonProperties.getInstance().getFormatVersion();
this.encoder = new TablePageEncoder(model);
+ String noInvertedIdxCol = "";
+ for (CarbonDimension cd : model.getSegmentProperties().getDimensions()) {
+ if (!cd.isUseInvertedIndex()) {
+ noInvertedIdxCol += (cd.getColName() + ",");
+ }
+ }
+ LOGGER.info("Columns considered as NoInverted Index are " + noInvertedIdxCol);
}
private void initParameters(CarbonFactDataHandlerModel model) {
[03/50] [abbrv] carbondata git commit: adding the test case for the
bad record condition with update operation.
Posted by ja...@apache.org.
adding the test case for the bad record condition with update operation.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/026ceae2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/026ceae2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/026ceae2
Branch: refs/heads/datamap
Commit: 026ceae2025ac32f91dea8a632014233144dd27c
Parents: 480ebb8
Author: ravikiran23 <ra...@gmail.com>
Authored: Wed Jun 28 12:27:46 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 12:49:13 2017 +0530
----------------------------------------------------------------------
.../src/test/resources/IUD/badrecord.csv | 3 +
...UpdateCarbonTableTestCaseWithBadRecord.scala | 70 ++++++++++++++++++++
2 files changed, 73 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/026ceae2/integration/spark-common-test/src/test/resources/IUD/badrecord.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/IUD/badrecord.csv b/integration/spark-common-test/src/test/resources/IUD/badrecord.csv
new file mode 100644
index 0000000..6590732
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/IUD/badrecord.csv
@@ -0,0 +1,3 @@
+c1,c2,c3,c5
+ravi,2,kiran,huawei
+manohar,4,vanam,huawei
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/026ceae2/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCaseWithBadRecord.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCaseWithBadRecord.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCaseWithBadRecord.scala
new file mode 100644
index 0000000..7cc8ebc
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCaseWithBadRecord.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.spark.testsuite.iud
+
+import org.apache.spark.sql.common.util.QueryTest
+import org.apache.spark.sql.{Row, SaveMode}
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.common.constants.LoggerAction
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+class UpdateCarbonTableTestCaseWithBadRecord extends QueryTest with BeforeAndAfterAll {
+ override def beforeAll {
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION , LoggerAction.FORCE.name())
+ }
+
+
+ test("test update operation with Badrecords action as force.") {
+ sql("""drop table if exists badtable""").show
+ sql("""create table badtable (c1 string,c2 int,c3 string,c5 string) STORED BY 'org.apache.carbondata.format'""")
+ sql(s"""LOAD DATA LOCAL INPATH '$resourcesPath/IUD/badrecord.csv' INTO table badtable""")
+ sql("""update badtable d set (d.c2) = (d.c2 / 1)""").show()
+ checkAnswer(
+ sql("""select c1,c2,c3,c5 from badtable"""),
+ Seq(Row("ravi",null,"kiran","huawei"),Row("manohar",null,"vanam","huawei"))
+ )
+ sql("""drop table badtable""").show
+
+
+ }
+ test("test update operation with Badrecords action as FAIL.") {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION , LoggerAction.FAIL.name())
+ sql("""drop table if exists badtable""").show
+ sql("""create table badtable (c1 string,c2 int,c3 string,c5 string) STORED BY 'org.apache.carbondata.format'""")
+ sql(s"""LOAD DATA LOCAL INPATH '$resourcesPath/IUD/badrecord.csv' INTO table badtable""")
+ val exec = intercept[Exception] {
+ sql("""update badtable d set (d.c2) = (d.c2 / 1)""").show()
+ }
+ checkAnswer(
+ sql("""select c1,c2,c3,c5 from badtable"""),
+ Seq(Row("ravi",2,"kiran","huawei"),Row("manohar",4,"vanam","huawei"))
+ )
+ sql("""drop table badtable""").show
+
+
+ }
+
+ override def afterAll {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION , CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT)
+ }
+}
\ No newline at end of file
[02/50] [abbrv] carbondata git commit: [CARBONDATA-1136] Fix
compaction buf for the partition table. This closes #1002.
Posted by ja...@apache.org.
[CARBONDATA-1136] Fix compaction buf for the partition table. This closes #1002.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/480ebb8d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/480ebb8d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/480ebb8d
Branch: refs/heads/datamap
Commit: 480ebb8dd973d6aafb54b4407670a0600fda926b
Parents: c8f742d aecf496
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 12:04:11 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 12:04:11 2017 +0530
----------------------------------------------------------------------
.../core/metadata/schema/table/CarbonTable.java | 4 +
.../TestCompactionForPartitionTable.scala | 84 ++++++++++++++++++++
.../carbondata/spark/rdd/CarbonMergerRDD.scala | 27 +++++--
.../spark/rdd/CarbonSparkPartition.scala | 3 +-
4 files changed, 109 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
[15/50] [abbrv] carbondata git commit: remove spark.version in
submodule pom.xml
Posted by ja...@apache.org.
remove spark.version in submodule pom.xml
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d3975a92
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d3975a92
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d3975a92
Branch: refs/heads/datamap
Commit: d3975a929e58937d1e23315f1f65c895d325b2ed
Parents: 92ba101
Author: cenyuhai <26...@qq.com>
Authored: Sun Jun 11 17:21:58 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jul 1 22:45:29 2017 +0800
----------------------------------------------------------------------
examples/spark2/pom.xml | 3 ---
integration/spark2/pom.xml | 3 ---
2 files changed, 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3975a92/examples/spark2/pom.xml
----------------------------------------------------------------------
diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml
index 75bcef7..1802451 100644
--- a/examples/spark2/pom.xml
+++ b/examples/spark2/pom.xml
@@ -30,9 +30,6 @@
<name>Apache CarbonData :: Spark2 Examples</name>
<properties>
- <spark.version>2.1.0</spark.version>
- <scala.binary.version>2.11</scala.binary.version>
- <scala.version>2.11.8</scala.version>
<dev.path>${basedir}/../../dev</dev.path>
</properties>
http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3975a92/integration/spark2/pom.xml
----------------------------------------------------------------------
diff --git a/integration/spark2/pom.xml b/integration/spark2/pom.xml
index a31513d..8279c5d 100644
--- a/integration/spark2/pom.xml
+++ b/integration/spark2/pom.xml
@@ -30,9 +30,6 @@
<name>Apache CarbonData :: Spark2</name>
<properties>
- <spark.version>2.1.0</spark.version>
- <scala.binary.version>2.11</scala.binary.version>
- <scala.version>2.11.8</scala.version>
<dev.path>${basedir}/../../dev</dev.path>
</properties>
[43/50] [abbrv] carbondata git commit: [CARBONDATA-1289] remove
unused method
Posted by ja...@apache.org.
[CARBONDATA-1289] remove unused method
This closes #1157
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8b31f09b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8b31f09b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8b31f09b
Branch: refs/heads/datamap
Commit: 8b31f09b638cb5b8fbbdfa56f29f8c97e68e6aa6
Parents: 403c3d9
Author: czg516516 <cz...@163.com>
Authored: Tue Jul 11 11:01:47 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Tue Jul 11 12:56:18 2017 +0800
----------------------------------------------------------------------
.../newflow/exception/CarbonDataLoadingException.java | 13 -------------
1 file changed, 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/8b31f09b/processing/src/main/java/org/apache/carbondata/processing/newflow/exception/CarbonDataLoadingException.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/exception/CarbonDataLoadingException.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/exception/CarbonDataLoadingException.java
index b9593e7..6ffdd03 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/newflow/exception/CarbonDataLoadingException.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/exception/CarbonDataLoadingException.java
@@ -17,8 +17,6 @@
package org.apache.carbondata.processing.newflow.exception;
-import java.util.Locale;
-
public class CarbonDataLoadingException extends RuntimeException {
/**
* default serial version ID.
@@ -60,17 +58,6 @@ public class CarbonDataLoadingException extends RuntimeException {
}
/**
- * This method is used to get the localized message.
- *
- * @param locale - A Locale object represents a specific geographical,
- * political, or cultural region.
- * @return - Localized error message.
- */
- public String getLocalizedMessage(Locale locale) {
- return "";
- }
-
- /**
* getLocalizedMessage
*/
@Override public String getLocalizedMessage() {
[24/50] [abbrv] carbondata git commit: [CARBONDATA-1244] Polish docs
and comments in presto integration
Posted by ja...@apache.org.
[CARBONDATA-1244] Polish docs and comments in presto integration
This closes #1131
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b699ee6f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b699ee6f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b699ee6f
Branch: refs/heads/datamap
Commit: b699ee6f72d280ff0969663f598769e66d8abdb9
Parents: bbb95ce
Author: bianhq <bi...@gmail.com>
Authored: Tue Jul 4 01:36:42 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Tue Jul 4 11:52:19 2017 +0800
----------------------------------------------------------------------
integration/presto/README.md | 51 ++++--
.../presto/CarbondataConnectorFactory.java | 2 +-
.../presto/impl/CarbonLocalInputSplit.java | 13 +-
.../presto/impl/CarbonTableCacheModel.java | 2 +-
.../presto/impl/CarbonTableReader.java | 154 +++++++++++++++++--
5 files changed, 185 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b699ee6f/integration/presto/README.md
----------------------------------------------------------------------
diff --git a/integration/presto/README.md b/integration/presto/README.md
index 9935478..dc14cb0 100644
--- a/integration/presto/README.md
+++ b/integration/presto/README.md
@@ -20,14 +20,10 @@
Please follow the below steps to query carbondata in presto
### Config presto server
-* Download presto server 0.166 : https://repo1.maven.org/maven2/com/facebook/presto/presto-server/
-* Finish configuration as per https://prestodb.io/docs/current/installation/deployment.html
- for example:
+* Download presto server (0.166 is suggested and supported) : https://repo1.maven.org/maven2/com/facebook/presto/presto-server/
+* Finish presto configuration following https://prestodb.io/docs/current/installation/deployment.html.
+ A configuration example:
```
- carbondata.properties:
- connector.name=carbondata
- carbondata-store=/Users/apple/DEMO/presto_test/data
-
config.properties:
coordinator=true
node-scheduler.include-coordinator=true
@@ -57,30 +53,51 @@ Please follow the below steps to query carbondata in presto
node.id=ffffffff-ffff-ffff-ffff-ffffffffffff
node.data-dir=/Users/apple/DEMO/presto_test/data
```
-* config carbondata-connector for presto
+* Config carbondata-connector for presto
- First:compile carbondata-presto integration module
+ Firstly: Compile carbondata, including carbondata-presto integration module
```
$ git clone https://github.com/apache/carbondata
- $ cd carbondata/integration/presto
- $ mvn clean package
+ $ cd carbondata
+ $ mvn -DskipTests -P{spark-version} -Dspark.version={spark-version-number} -Dhadoop.version={hadoop-version-number} clean package
+ ```
+ Replace the spark and hadoop version with the version used in your cluster.
+ For example, if you are using Spark 2.1.0 and Hadoop 2.7.2, you would like to compile using:
+ ```
+ mvn -DskipTests -Pspark-2.1 -Dspark.version=2.1.0 -Dhadoop.version=2.7.2 clean package
+ ```
+
+ Secondly: Create a folder named 'carbondata' under $PRESTO_HOME$/plugin and
+ copy all jars from carbondata/integration/presto/target/carbondata-presto-x.x.x-SNAPSHOT
+ to $PRESTO_HOME$/plugin/carbondata
+
+ Thirdly: Create a carbondata.properties file under $PRESTO_HOME$/etc/catalog/ containing the following contents:
```
- Second:create one folder "carbondata" under ./presto-server-0.166/plugin
- Third:copy all jar from ./carbondata/integration/presto/target/carbondata-presto-x.x.x-SNAPSHOT
- to ./presto-server-0.166/plugin/carbondata
+ connector.name=carbondata
+ carbondata-store={schema-store-path}
+ ```
+ Replace the schema-store-path with the absolute path of the parent directory of the schema.
+ For example, if you have a schema named 'default' stored in hdfs://namenode:9000/test/carbondata/,
+ Then set carbondata-store=hdfs://namenode:9000/test/carbondata
+
+ If you updated the jar balls or configuration files, make sure you have dispatched them
+ to all the presto nodes and restarted the presto servers on the nodes. The updates will not take effect before restarting.
### Generate CarbonData file
-Please refer to quick start : https://github.com/apache/carbondata/blob/master/docs/quick-start-guide.md
+Please refer to quick start: https://github.com/apache/carbondata/blob/master/docs/quick-start-guide.md.
+Load data statement in Spark can be used to create carbondata tables. And then you can easily find the created
+carbondata files.
### Query carbondata in CLI of presto
-* Download presto-cli-0.166-executable.jar
+* Download presto cli client following: https://prestodb.io/docs/current/installation/cli.html
* Start CLI:
```
- $ ./presto-cli-0.166-executable.jar --server localhost:8086 --catalog carbondata --schema default
+ $ ./presto --server localhost:8086 --catalog carbondata --schema default
```
+ Replace the hostname, port and schema name with your own.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b699ee6f/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataConnectorFactory.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataConnectorFactory.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataConnectorFactory.java
index d97f19e..d557920 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataConnectorFactory.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataConnectorFactory.java
@@ -71,7 +71,7 @@ public class CarbondataConnectorFactory implements ConnectorFactory {
ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class);
ConnectorRecordSetProvider connectorRecordSet =
injector.getInstance(ConnectorRecordSetProvider.class);
- ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class);
+ ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class);
return new CarbondataConnector(lifeCycleManager, metadata,
new ClassLoaderSafeConnectorSplitManager(splitManager, classLoader), connectorRecordSet,
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b699ee6f/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonLocalInputSplit.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonLocalInputSplit.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonLocalInputSplit.java
index ba8d9b5..f0a8428 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonLocalInputSplit.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonLocalInputSplit.java
@@ -17,19 +17,22 @@
package org.apache.carbondata.presto.impl;
-import java.util.List;
-
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.List;
+
+/**
+ * CarbonLocalInputSplit represents a block, it contains a set of blocklet.
+ */
public class CarbonLocalInputSplit {
private static final long serialVersionUID = 3520344046772190207L;
private String segmentId;
private String path;
- private long start;
- private long length;
- private List<String> locations;
+ private long start; // the start offset of the block in a carbondata file.
+ private long length; // the length of the block.
+ private List<String> locations;// locations are the locations for different replicas.
private short version;
/**
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b699ee6f/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableCacheModel.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableCacheModel.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableCacheModel.java
index 45755d1..2a4db14 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableCacheModel.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableCacheModel.java
@@ -23,7 +23,7 @@ import org.apache.carbondata.core.metadata.schema.table.TableInfo;
import org.apache.carbondata.core.util.path.CarbonTablePath;
/**
- * Caching Carbon meta(e.g. TableIdentifier, TablePath, TableInfo, CarbonTable) in Class CarbonTableReader
+ * Caching metadata of CarbonData(e.g. TableIdentifier, TablePath, TableInfo, CarbonTable) in Class CarbonTableReader
* to speed up query
*/
public class CarbonTableCacheModel {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b699ee6f/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
index c328a64..54832f5 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
@@ -72,15 +72,31 @@ import static java.util.Objects.requireNonNull;
* 2:FileFactory, (physic table file)
* 3:CarbonCommonFactory, (offer some )
* 4:DictionaryFactory, (parse dictionary util)
+ *
+ * Currently, it is mainly used to parse metadata of tables under
+ * the configured carbondata-store path and filter the relevant
+ * input splits with given query predicates.
*/
public class CarbonTableReader {
private CarbonTableConfig config;
+
+ /**
+ * The names of the tables under the schema (this.carbonFileList).
+ */
private List<SchemaTableName> tableList;
+
+ /**
+ * carbonFileList represents the store path of the schema, which is configured as carbondata-store
+ * in the CarbonData catalog file ($PRESTO_HOME$/etc/catalog/carbondata.properties).
+ */
private CarbonFile carbonFileList;
private FileFactory.FileType fileType;
- // A cache for Carbon reader
+ /**
+ * A cache for Carbon reader, with this cache,
+ * metadata of a table is only read from file system once.
+ */
private ConcurrentHashMap<SchemaTableName, CarbonTableCacheModel> cc;
@Inject public CarbonTableReader(CarbonTableConfig config) {
@@ -88,9 +104,14 @@ public class CarbonTableReader {
this.cc = new ConcurrentHashMap<>();
}
- // for worker node to initialize carbon metastore
+ /**
+ * For presto worker node to initialize the metadata cache of a table.
+ * @param table the name of the table and schema.
+ * @return
+ */
public CarbonTableCacheModel getCarbonCache(SchemaTableName table) {
if (!cc.containsKey(table)) {
+ // if this table is not cached, try to read the metadata of the table and cache it.
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(
FileFactory.class.getClassLoader())) {
if (carbonFileList == null) {
@@ -110,17 +131,26 @@ public class CarbonTableReader {
else return null;
}
+ /**
+ * Return the schema names under a schema store path (this.carbonFileList).
+ * @return
+ */
public List<String> getSchemaNames() {
return updateSchemaList();
}
- // default PathFilter
+ // default PathFilter, accepts files in carbondata format (with .carbondata extension).
private static final PathFilter DefaultFilter = new PathFilter() {
@Override public boolean accept(Path path) {
return CarbonTablePath.isCarbonDataFile(path.getName());
}
};
+ /**
+ * Get the CarbonFile instance which represents the store path in the configuration, and assign it to
+ * this.carbonFileList.
+ * @return
+ */
public boolean updateCarbonFile() {
if (carbonFileList == null) {
fileType = FileFactory.getFileType(config.getStorePath());
@@ -133,6 +163,10 @@ public class CarbonTableReader {
return true;
}
+ /**
+ * Return the schema names under a schema store path (this.carbonFileList).
+ * @return
+ */
public List<String> updateSchemaList() {
updateCarbonFile();
@@ -143,13 +177,23 @@ public class CarbonTableReader {
} else return ImmutableList.of();
}
+ /**
+ * Get the names of the tables in the given schema.
+ * @param schema name of the schema
+ * @return
+ */
public Set<String> getTableNames(String schema) {
requireNonNull(schema, "schema is null");
return updateTableList(schema);
}
- public Set<String> updateTableList(String dbName) {
- List<CarbonFile> schema = Stream.of(carbonFileList.listFiles()).filter(a -> dbName.equals(a.getName()))
+ /**
+ * Get the names of the tables in the given schema.
+ * @param schemaName name of the schema
+ * @return
+ */
+ public Set<String> updateTableList(String schemaName) {
+ List<CarbonFile> schema = Stream.of(carbonFileList.listFiles()).filter(a -> schemaName.equals(a.getName()))
.collect(Collectors.toList());
if (schema.size() > 0) {
return Stream.of((schema.get(0)).listFiles()).map(a -> a.getName())
@@ -157,6 +201,11 @@ public class CarbonTableReader {
} else return ImmutableSet.of();
}
+ /**
+ * Get the CarbonTable instance of the given table.
+ * @param schemaTableName name of the given table.
+ * @return
+ */
public CarbonTable getTable(SchemaTableName schemaTableName) {
try {
updateSchemaTables();
@@ -170,6 +219,11 @@ public class CarbonTableReader {
return table;
}
+ /**
+ * Find all the tables under the schema store path (this.carbonFileList)
+ * and cache all the table names in this.tableList. Notice that whenever this method
+ * is called, it clears this.tableList and populate the list by reading the files.
+ */
public void updateSchemaTables() {
// update logic determine later
if (carbonFileList == null) {
@@ -185,6 +239,12 @@ public class CarbonTableReader {
}
}
+ /**
+ * Find the table with the given name and build a CarbonTable instance for it.
+ * This method should be called after this.updateSchemaTables().
+ * @param schemaTableName name of the given table.
+ * @return
+ */
private CarbonTable loadTableMetadata(SchemaTableName schemaTableName) {
for (SchemaTableName table : tableList) {
if (!table.equals(schemaTableName)) continue;
@@ -195,7 +255,9 @@ public class CarbonTableReader {
}
/**
- * parse carbon metadata into cc(CarbonTableReader cache)
+ * Read the metadata of the given table and cache it in this.cc (CarbonTableReader cache).
+ * @param table name of the given table.
+ * @return the CarbonTable instance which contains all the needed metadata for a table.
*/
public CarbonTable parseCarbonMetadata(SchemaTableName table) {
CarbonTable result = null;
@@ -203,17 +265,25 @@ public class CarbonTableReader {
CarbonTableCacheModel cache = cc.getOrDefault(table, new CarbonTableCacheModel());
if (cache.isValid()) return cache.carbonTable;
- //Step1: get table meta path, load carbon table param
+ // If table is not previously cached, then:
+
+ // Step 1: get store path of the table and cache it.
String storePath = config.getStorePath();
+ // create table identifier. the table id is randomly generated.
cache.carbonTableIdentifier =
new CarbonTableIdentifier(table.getSchemaName(), table.getTableName(),
UUID.randomUUID().toString());
+ // get the store path of the table.
cache.carbonTablePath =
PathFactory.getInstance().getCarbonTablePath(storePath, cache.carbonTableIdentifier);
+ // cache the table
cc.put(table, cache);
- //Step2: check file existed? read schema file
+ //Step 2: read the metadata (tableInfo) of the table.
ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
+ // TBase is used to read and write thrift objects.
+ // TableInfo is a kind of TBase used to read and write table information.
+ // TableInfo is generated by thrift, see schema.thrift under format/src/main/thrift for details.
public TBase create() {
return new org.apache.carbondata.format.TableInfo();
}
@@ -225,14 +295,16 @@ public class CarbonTableReader {
(org.apache.carbondata.format.TableInfo) thriftReader.read();
thriftReader.close();
- // Step3: Transform Format Level TableInfo to Code Level TableInfo
+ // Step 3: convert format level TableInfo to code level TableInfo
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
+ // wrapperTableInfo is the code level information of a table in carbondata core, different from the Thrift TableInfo.
TableInfo wrapperTableInfo = schemaConverter
.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(),
storePath);
wrapperTableInfo.setMetaDataFilepath(
CarbonTablePath.getFolderContainingFile(cache.carbonTablePath.getSchemaFilePath()));
- // Step4: Load metadata info into CarbonMetadata
+
+ // Step 4: Load metadata info into CarbonMetadata
CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
cache.tableInfo = wrapperTableInfo;
@@ -246,6 +318,13 @@ public class CarbonTableReader {
return result;
}
+ /**
+ * Apply filters to the table and get valid input splits of the table.
+ * @param tableCacheModel the table
+ * @param filters the filters
+ * @return
+ * @throws Exception
+ */
public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel,
Expression filters) throws Exception {
@@ -332,7 +411,16 @@ public class CarbonTableReader {
}
/**
- * get data blocks of given segment
+ * Get all the data blocks of a given segment.
+ * @param filterExpressionProcessor
+ * @param absoluteTableIdentifier
+ * @param tablePath
+ * @param resolver
+ * @param segmentId
+ * @param cacheClient
+ * @param updateStatusManager
+ * @return
+ * @throws IOException
*/
private List<DataRefNode> getDataBlocksOfSegment(
FilterExpressionProcessor filterExpressionProcessor,
@@ -380,6 +468,16 @@ public class CarbonTableReader {
return false;
}
+ /**
+ * Build and load the B-trees of the segment.
+ * @param absoluteTableIdentifier
+ * @param tablePath
+ * @param segmentId
+ * @param cacheClient
+ * @param updateStatusManager
+ * @return
+ * @throws IOException
+ */
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(/*JobContext job,*/
AbsoluteTableIdentifier absoluteTableIdentifier, CarbonTablePath tablePath, String segmentId,
CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
@@ -491,6 +589,13 @@ public class CarbonTableReader {
return false;
}
+ /**
+ * Get the input splits of a set of carbondata files.
+ * @param fileStatusList the file statuses of the set of carbondata files.
+ * @param targetSystem hdfs FileSystem
+ * @return
+ * @throws IOException
+ */
private List<InputSplit> getSplit(List<FileStatus> fileStatusList, FileSystem targetSystem)
throws IOException {
@@ -501,6 +606,7 @@ public class CarbonTableReader {
while (true) {
while (true) {
while (split.hasNext()) {
+ // file is a carbondata file
FileStatus file = (FileStatus) split.next();
Path path = file.getPath();
long length = file.getLen();
@@ -520,7 +626,7 @@ public class CarbonTableReader {
int blkIndex;
for (
bytesRemaining = length;
- (double) bytesRemaining / (double) splitSize > 1.1D;
+ (double) bytesRemaining / (double) splitSize > 1.1D;// when there are more than one splits left.
bytesRemaining -= splitSize) {
blkIndex = this.getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(this.makeSplit(path, length - bytesRemaining, splitSize,
@@ -552,6 +658,15 @@ public class CarbonTableReader {
return new String[] { "0" };
}
+ /**
+ * Get all file statuses of the carbondata files with a segmentId in segmentsToConsider
+ * under the tablePath, and add them to the result.
+ * @param segmentsToConsider
+ * @param tablePath
+ * @param result
+ * @return the FileSystem instance been used in this function.
+ * @throws IOException
+ */
private FileSystem getFileStatusOfSegments(String[] segmentsToConsider, CarbonTablePath tablePath,
List<FileStatus> result) throws IOException {
String[] partitionsToConsider = getValidPartitions();
@@ -584,6 +699,7 @@ public class CarbonTableReader {
LocatedFileStatus stat = iter.next();
if (DefaultFilter.accept(stat.getPath())) {
if (stat.isDirectory()) {
+ // DefaultFiler accepts carbondata files.
addInputPathRecursively(result, fs, stat.getPath(), DefaultFilter);
} else {
result.add(stat);
@@ -598,6 +714,15 @@ public class CarbonTableReader {
return fs;
}
+ /**
+ * Get the FileStatus of all carbondata files under the path recursively,
+ * and add the file statuses into the result
+ * @param result
+ * @param fs
+ * @param path
+ * @param inputFilter the filter used to determinate whether a path is a carbondata file
+ * @throws IOException
+ */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
PathFilter inputFilter) throws IOException {
RemoteIterator iter = fs.listLocatedStatus(path);
@@ -616,7 +741,10 @@ public class CarbonTableReader {
}
/**
- * get data blocks of given btree
+ * Get the data blocks of a b tree. the root node of the b tree is abstractIndex.dataRefNode.
+ * BTreeNode is a sub class of DataRefNode.
+ * @param abstractIndex
+ * @return
*/
private List<DataRefNode> getDataBlocksOfIndex(AbstractIndex abstractIndex) {
List<DataRefNode> blocks = new LinkedList<DataRefNode>();
[49/50] [abbrv] carbondata git commit: [CARBONDATA-1232] Datamap
implementation for Blocklet
Posted by ja...@apache.org.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java
new file mode 100644
index 0000000..defe766
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.row;
+
+import org.apache.carbondata.core.indexstore.schema.DataMapSchema;
+
+/**
+ * It is just a normal row to store data. Implementation classes could be safe and unsafe.
+ * TODO move this class a global row and use across loading after DataType is changed class
+ */
+public abstract class DataMapRow {
+
+ protected DataMapSchema[] schemas;
+
+ public DataMapRow(DataMapSchema[] schemas) {
+ this.schemas = schemas;
+ }
+
+ public abstract byte[] getByteArray(int ordinal);
+
+ public abstract DataMapRow getRow(int ordinal);
+
+ public abstract void setRow(DataMapRow row, int ordinal);
+
+ public abstract void setByteArray(byte[] byteArray, int ordinal);
+
+ public abstract int getInt(int ordinal);
+
+ public abstract void setInt(int value, int ordinal);
+
+ public abstract void setByte(byte value, int ordinal);
+
+ public abstract byte getByte(int ordinal);
+
+ public abstract void setShort(short value, int ordinal);
+
+ public abstract short getShort(int ordinal);
+
+ public abstract void setLong(long value, int ordinal);
+
+ public abstract long getLong(int ordinal);
+
+ public abstract void setFloat(float value, int ordinal);
+
+ public abstract float getFloat(int ordinal);
+
+ public abstract void setDouble(double value, int ordinal);
+
+ public abstract double getDouble(int ordinal);
+
+ public int getTotalSizeInBytes() {
+ int len = 0;
+ for (int i = 0; i < schemas.length; i++) {
+ len += getSizeInBytes(i);
+ }
+ return len;
+ }
+
+ public int getSizeInBytes(int ordinal) {
+ switch (schemas[ordinal].getSchemaType()) {
+ case FIXED:
+ return schemas[ordinal].getLength();
+ case VARIABLE:
+ return getByteArray(ordinal).length + 2;
+ case STRUCT:
+ return getRow(ordinal).getTotalSizeInBytes();
+ default:
+ throw new UnsupportedOperationException("wrong type");
+ }
+ }
+
+ public int getColumnCount() {
+ return schemas.length;
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java
new file mode 100644
index 0000000..adec346
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.row;
+
+import org.apache.carbondata.core.indexstore.schema.DataMapSchema;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+
+/**
+ * Data map row.
+ */
+public class DataMapRowImpl extends DataMapRow {
+
+ private Object[] data;
+
+ public DataMapRowImpl(DataMapSchema[] schemas) {
+ super(schemas);
+ this.data = new Object[schemas.length];
+ }
+
+ @Override public byte[] getByteArray(int ordinal) {
+ return (byte[]) data[ordinal];
+ }
+
+ @Override public DataMapRow getRow(int ordinal) {
+ return (DataMapRow) data[ordinal];
+ }
+
+ @Override public void setByteArray(byte[] byteArray, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.BYTE_ARRAY);
+ data[ordinal] = byteArray;
+ }
+
+ @Override public int getInt(int ordinal) {
+ return (Integer) data[ordinal];
+ }
+
+ @Override public void setInt(int value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.INT);
+ data[ordinal] = value;
+ }
+
+ @Override public void setByte(byte value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.BYTE);
+ data[ordinal] = value;
+ }
+
+ @Override public byte getByte(int ordinal) {
+ return (Byte) data[ordinal];
+ }
+
+ @Override public void setShort(short value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.SHORT);
+ data[ordinal] = value;
+ }
+
+ @Override public short getShort(int ordinal) {
+ return (Short) data[ordinal];
+ }
+
+ @Override public void setLong(long value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.LONG);
+ data[ordinal] = value;
+ }
+
+ @Override public long getLong(int ordinal) {
+ return (Long) data[ordinal];
+ }
+
+ @Override public void setFloat(float value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.FLOAT);
+ data[ordinal] = value;
+ }
+
+ @Override public float getFloat(int ordinal) {
+ return (Float) data[ordinal];
+ }
+
+ @Override public void setDouble(double value, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.DOUBLE);
+ data[ordinal] = value;
+ }
+
+ @Override public void setRow(DataMapRow row, int ordinal) {
+ assert (schemas[ordinal].getDataType() == DataType.STRUCT);
+ data[ordinal] = row;
+ }
+
+ @Override public double getDouble(int ordinal) {
+ return (Double) data[ordinal];
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java
new file mode 100644
index 0000000..ef78514
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.indexstore.row;
+
+import org.apache.carbondata.core.indexstore.schema.DataMapSchema;
+import org.apache.carbondata.core.memory.MemoryBlock;
+
+import static org.apache.carbondata.core.memory.CarbonUnsafe.BYTE_ARRAY_OFFSET;
+import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe;
+
+/**
+ * Unsafe implementation of data map row.
+ */
+public class UnsafeDataMapRow extends DataMapRow {
+
+ private MemoryBlock block;
+
+ private int pointer;
+
+ public UnsafeDataMapRow(DataMapSchema[] schemas, MemoryBlock block, int pointer) {
+ super(schemas);
+ this.block = block;
+ this.pointer = pointer;
+ }
+
+ @Override public byte[] getByteArray(int ordinal) {
+ int length;
+ int position = getPosition(ordinal);
+ switch (schemas[ordinal].getSchemaType()) {
+ case VARIABLE:
+ length = unsafe.getShort(block.getBaseObject(), block.getBaseOffset() + pointer + position);
+ position += 2;
+ break;
+ default:
+ length = schemas[ordinal].getLength();
+ }
+ byte[] data = new byte[length];
+ unsafe.copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + position, data,
+ BYTE_ARRAY_OFFSET, data.length);
+ return data;
+ }
+
+ @Override public DataMapRow getRow(int ordinal) {
+ DataMapSchema[] childSchemas =
+ ((DataMapSchema.StructDataMapSchema) schemas[ordinal]).getChildSchemas();
+ return new UnsafeDataMapRow(childSchemas, block, pointer + getPosition(ordinal));
+ }
+
+ @Override public void setByteArray(byte[] byteArray, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public int getInt(int ordinal) {
+ return unsafe
+ .getInt(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setInt(int value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public void setByte(byte value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public byte getByte(int ordinal) {
+ return unsafe
+ .getByte(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setShort(short value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public short getShort(int ordinal) {
+ return unsafe
+ .getShort(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setLong(long value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public long getLong(int ordinal) {
+ return unsafe
+ .getLong(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setFloat(float value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public float getFloat(int ordinal) {
+ return unsafe
+ .getFloat(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setDouble(double value, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ @Override public double getDouble(int ordinal) {
+ return unsafe
+ .getDouble(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal));
+ }
+
+ @Override public void setRow(DataMapRow row, int ordinal) {
+ throw new UnsupportedOperationException("Not supported to set on unsafe row");
+ }
+
+ private int getPosition(int ordinal) {
+ int position = 0;
+ for (int i = 0; i < ordinal; i++) {
+ position += getSizeInBytes(i);
+ }
+ return position;
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java
new file mode 100644
index 0000000..80c68ac
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.schema;
+
+import org.apache.carbondata.core.metadata.datatype.DataType;
+
+/**
+ * It just have 2 types right now, either fixed or variable.
+ */
+public abstract class DataMapSchema {
+
+ protected DataType dataType;
+
+ public DataMapSchema(DataType dataType) {
+ this.dataType = dataType;
+ }
+
+ /**
+ * Either fixed or variable length.
+ *
+ * @return
+ */
+ public DataType getDataType() {
+ return dataType;
+ }
+
+ /**
+ * Gives length in case of fixed schema other wise returns length
+ *
+ * @return
+ */
+ public abstract int getLength();
+
+ /**
+ * schema type
+ * @return
+ */
+ public abstract DataMapSchemaType getSchemaType();
+
+ /*
+ * It has always fixed length, length cannot be updated later.
+ * Usage examples : all primitive types like short, int etc
+ */
+ public static class FixedDataMapSchema extends DataMapSchema {
+
+ private int length;
+
+ public FixedDataMapSchema(DataType dataType) {
+ super(dataType);
+ }
+
+ public FixedDataMapSchema(DataType dataType, int length) {
+ super(dataType);
+ this.length = length;
+ }
+
+ @Override public int getLength() {
+ if (length == 0) {
+ return dataType.getSizeInBytes();
+ } else {
+ return length;
+ }
+ }
+
+ @Override public DataMapSchemaType getSchemaType() {
+ return DataMapSchemaType.FIXED;
+ }
+ }
+
+ public static class VariableDataMapSchema extends DataMapSchema {
+
+ public VariableDataMapSchema(DataType dataType) {
+ super(dataType);
+ }
+
+ @Override public int getLength() {
+ return dataType.getSizeInBytes();
+ }
+
+ @Override public DataMapSchemaType getSchemaType() {
+ return DataMapSchemaType.VARIABLE;
+ }
+ }
+
+ public static class StructDataMapSchema extends DataMapSchema {
+
+ private DataMapSchema[] childSchemas;
+
+ public StructDataMapSchema(DataType dataType, DataMapSchema[] childSchemas) {
+ super(dataType);
+ this.childSchemas = childSchemas;
+ }
+
+ @Override public int getLength() {
+ return dataType.getSizeInBytes();
+ }
+
+ public DataMapSchema[] getChildSchemas() {
+ return childSchemas;
+ }
+
+ @Override public DataMapSchemaType getSchemaType() {
+ return DataMapSchemaType.STRUCT;
+ }
+ }
+
+ public enum DataMapSchemaType {
+ FIXED, VARIABLE, STRUCT
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/FilterType.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/schema/FilterType.java b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/FilterType.java
new file mode 100644
index 0000000..9d77010
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/FilterType.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.schema;
+
+/**
+ * Types of filters of select query
+ */
+public enum FilterType {
+ EQUALTO, GREATER_THAN, LESS_THAN, GREATER_THAN_EQUAL, LESS_THAN_EQUAL, LIKE
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
index bfa9d7e..f81f805 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java
@@ -17,16 +17,22 @@
package org.apache.carbondata.core.metadata.blocklet;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
+import org.apache.hadoop.io.Writable;
+
/**
* class to store the information about the blocklet
*/
-public class BlockletInfo implements Serializable {
+public class BlockletInfo implements Serializable, Writable {
/**
* serialization id
@@ -189,4 +195,49 @@ public class BlockletInfo implements Serializable {
this.numberOfPages = numberOfPages;
}
+ @Override public void write(DataOutput output) throws IOException {
+ output.writeLong(dimensionOffset);
+ output.writeLong(measureOffsets);
+ int dsize = dimensionChunkOffsets != null ? dimensionChunkOffsets.size() : 0;
+ output.writeShort(dsize);
+ for (int i = 0; i < dsize; i++) {
+ output.writeLong(dimensionChunkOffsets.get(i));
+ }
+ for (int i = 0; i < dsize; i++) {
+ output.writeInt(dimensionChunksLength.get(i));
+ }
+ int mSize = measureChunkOffsets != null ? measureChunkOffsets.size() : 0;
+ output.writeShort(mSize);
+ for (int i = 0; i < mSize; i++) {
+ output.writeLong(measureChunkOffsets.get(i));
+ }
+ for (int i = 0; i < mSize; i++) {
+ output.writeInt(measureChunksLength.get(i));
+ }
+ }
+
+ @Override public void readFields(DataInput input) throws IOException {
+ dimensionOffset = input.readLong();
+ measureOffsets = input.readLong();
+ short dimensionChunkOffsetsSize = input.readShort();
+ dimensionChunkOffsets = new ArrayList<>(dimensionChunkOffsetsSize);
+ for (int i = 0; i < dimensionChunkOffsetsSize; i++) {
+ dimensionChunkOffsets.add(input.readLong());
+ }
+ dimensionChunksLength = new ArrayList<>(dimensionChunkOffsetsSize);
+ for (int i = 0; i < dimensionChunkOffsetsSize; i++) {
+ dimensionChunksLength.add(input.readInt());
+ }
+
+ short measureChunkOffsetsSize = input.readShort();
+ measureChunkOffsets = new ArrayList<>(measureChunkOffsetsSize);
+ for (int i = 0; i < measureChunkOffsetsSize; i++) {
+ measureChunkOffsets.add(input.readLong());
+ }
+ measureChunksLength = new ArrayList<>(measureChunkOffsetsSize);
+ for (int i = 0; i < measureChunkOffsetsSize; i++) {
+ measureChunksLength.add(input.readInt());
+ }
+
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java
index cd86a07..ae99ed8 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java
@@ -16,6 +16,7 @@
*/
package org.apache.carbondata.core.metadata.index;
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
/**
@@ -45,6 +46,11 @@ public class BlockIndexInfo {
private BlockletIndex blockletIndex;
/**
+ * to store blocklet info like offsets and lengths of each column.
+ */
+ private BlockletInfo blockletInfo;
+
+ /**
* Constructor
*
* @param numberOfRows number of rows
@@ -61,6 +67,20 @@ public class BlockIndexInfo {
}
/**
+ *
+ * @param numberOfRows
+ * @param fileName
+ * @param offset
+ * @param blockletIndex
+ * @param blockletInfo
+ */
+ public BlockIndexInfo(long numberOfRows, String fileName, long offset,
+ BlockletIndex blockletIndex, BlockletInfo blockletInfo) {
+ this(numberOfRows, fileName, offset, blockletIndex);
+ this.blockletInfo = blockletInfo;
+ }
+
+ /**
* @return the numberOfRows
*/
public long getNumberOfRows() {
@@ -87,4 +107,11 @@ public class BlockIndexInfo {
public BlockletIndex getBlockletIndex() {
return blockletIndex;
}
+
+ /**
+ * @return BlockletInfo
+ */
+ public BlockletInfo getBlockletInfo() {
+ return blockletInfo;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
index ff54673..e0ee5bb 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java
@@ -21,8 +21,10 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
@@ -41,6 +43,7 @@ import org.apache.carbondata.core.datastore.block.AbstractIndex;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
+import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
@@ -116,23 +119,40 @@ public abstract class AbstractQueryExecutor<E> implements QueryExecutor<E> {
// so block will be loaded in sorted order this will be required for
// query execution
Collections.sort(queryModel.getTableBlockInfos());
- // get the table blocks
- CacheProvider cacheProvider = CacheProvider.getInstance();
- BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex> cache =
- (BlockIndexStore) cacheProvider
- .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath());
- // remove the invalid table blocks, block which is deleted or compacted
- cache.removeTableBlocks(queryModel.getInvalidSegmentIds(),
- queryModel.getAbsoluteTableIdentifier());
- List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers =
- prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(),
- queryModel.getAbsoluteTableIdentifier());
- cache.removeTableBlocksIfHorizontalCompactionDone(queryModel);
- queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers);
- queryStatistic
- .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
- queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
+ if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) {
+ List<AbstractIndex> indexList = new ArrayList<>();
+ Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>();
+ for (TableBlockInfo blockInfo: queryModel.getTableBlockInfos()) {
+ List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath());
+ if (tableBlockInfos == null) {
+ tableBlockInfos = new ArrayList<>();
+ listMap.put(blockInfo.getFilePath(), tableBlockInfos);
+ }
+ tableBlockInfos.add(blockInfo);
+ }
+ for (List<TableBlockInfo> tableBlockInfos: listMap.values()) {
+ indexList.add(new IndexWrapper(tableBlockInfos));
+ }
+ queryProperties.dataBlocks = indexList;
+ } else {
+ // get the table blocks
+ CacheProvider cacheProvider = CacheProvider.getInstance();
+ BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex> cache =
+ (BlockIndexStore) cacheProvider
+ .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath());
+ // remove the invalid table blocks, block which is deleted or compacted
+ cache.removeTableBlocks(queryModel.getInvalidSegmentIds(),
+ queryModel.getAbsoluteTableIdentifier());
+ List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers =
+ prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(),
+ queryModel.getAbsoluteTableIdentifier());
+ cache.removeTableBlocksIfHorizontalCompactionDone(queryModel);
+ queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers);
+ queryStatistic
+ .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
+ queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
+ }
// calculating the total number of aggeragted columns
int aggTypeCount = queryModel.getQueryMeasures().size();
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
index 8704496..a874835 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java
@@ -156,7 +156,7 @@ public class IncludeFilterExecuterImpl implements FilterExecuter {
int columnIndex = dimColumnEvaluatorInfo.getColumnIndex();
int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex);
- boolean isScanRequired =
+ boolean isScanRequired = blockIndex >= blkMaxVal.length ||
isScanRequired(blkMaxVal[blockIndex], blkMinVal[blockIndex], filterValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
index 6823531..c2e077e 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java
@@ -287,7 +287,7 @@ public class RangeValueFilterExecuterImpl extends ValueBasedFilterExecuterImpl {
BitSet bitSet = new BitSet(1);
byte[][] filterValues = this.filterRangesValues;
int columnIndex = this.dimColEvaluatorInfo.getColumnIndex();
- boolean isScanRequired =
+ boolean isScanRequired = columnIndex >= blockMinValue.length ||
isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
index be82be7..73352cb 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java
@@ -79,7 +79,7 @@ public class RowLevelRangeGrtThanFiterExecuterImpl extends RowLevelFilterExecute
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
- boolean isScanRequired =
+ boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length ||
isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
index 53da6c5..6e8e188 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java
@@ -81,7 +81,7 @@ public class RowLevelRangeGrtrThanEquaToFilterExecuterImpl extends RowLevelFilte
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
- boolean isScanRequired =
+ boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length ||
isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
index d694960..d6f7c86 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java
@@ -81,7 +81,7 @@ public class RowLevelRangeLessThanEqualFilterExecuterImpl extends RowLevelFilter
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
- boolean isScanRequired =
+ boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length ||
isScanRequired(blockMinValue[dimensionBlocksIndex[0]], filterRangeValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
index b3dd921..597ba52 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java
@@ -82,7 +82,7 @@ public class RowLevelRangeLessThanFiterExecuterImpl extends RowLevelFilterExecut
@Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) {
BitSet bitSet = new BitSet(1);
- boolean isScanRequired =
+ boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length ||
isScanRequired(blockMinValue[dimensionBlocksIndex[0]], filterRangeValues);
if (isScanRequired) {
bitSet.set(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java
index fdb5483..ff4f5dd 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java
@@ -165,6 +165,9 @@ public abstract class AbstractDataBlockIterator extends CarbonIterator<List<Obje
new BlocksChunkHolder(blockExecutionInfo.getTotalNumberDimensionBlock(),
blockExecutionInfo.getTotalNumberOfMeasureBlock(), fileReader);
blocksChunkHolder.setDataBlock(dataBlockIterator.next());
+ if (blocksChunkHolder.getDataBlock().getColumnsMaxValue() == null) {
+ return blocksChunkHolder;
+ }
if (blockletScanner.isScanRequired(blocksChunkHolder)) {
return blocksChunkHolder;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java
index 92e9594..95030d3 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java
@@ -32,6 +32,7 @@ import org.apache.carbondata.core.datastore.FileHolder;
import org.apache.carbondata.core.datastore.block.AbstractIndex;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNodeWrapper;
import org.apache.carbondata.core.mutate.DeleteDeltaVo;
import org.apache.carbondata.core.reader.CarbonDeleteFilesDataReader;
import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo;
@@ -127,20 +128,27 @@ public abstract class AbstractDetailQueryResultIterator<E> extends CarbonIterato
// set the deleted row to block execution info
blockInfo.setDeletedRecordsMap(deletedRowsMap);
}
- DataRefNode startDataBlock = finder
- .findFirstDataBlock(blockInfo.getDataBlock().getDataRefNode(), blockInfo.getStartKey());
- while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) {
- startDataBlock = startDataBlock.getNextDataRefNode();
- }
- long numberOfBlockToScan = blockInfo.getNumberOfBlockletToScan();
- //if number of block is less than 0 then take end block.
- if (numberOfBlockToScan <= 0) {
- DataRefNode endDataBlock = finder
- .findLastDataBlock(blockInfo.getDataBlock().getDataRefNode(), blockInfo.getEndKey());
- numberOfBlockToScan = endDataBlock.nodeNumber() - startDataBlock.nodeNumber() + 1;
+ DataRefNode dataRefNode = blockInfo.getDataBlock().getDataRefNode();
+ if (dataRefNode instanceof BlockletDataRefNodeWrapper) {
+ BlockletDataRefNodeWrapper wrapper = (BlockletDataRefNodeWrapper) dataRefNode;
+ blockInfo.setFirstDataBlock(wrapper);
+ blockInfo.setNumberOfBlockToScan(wrapper.numberOfNodes());
+
+ } else {
+ DataRefNode startDataBlock =
+ finder.findFirstDataBlock(dataRefNode, blockInfo.getStartKey());
+ while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) {
+ startDataBlock = startDataBlock.getNextDataRefNode();
+ }
+ long numberOfBlockToScan = blockInfo.getNumberOfBlockletToScan();
+ //if number of block is less than 0 then take end block.
+ if (numberOfBlockToScan <= 0) {
+ DataRefNode endDataBlock = finder.findLastDataBlock(dataRefNode, blockInfo.getEndKey());
+ numberOfBlockToScan = endDataBlock.nodeNumber() - startDataBlock.nodeNumber() + 1;
+ }
+ blockInfo.setFirstDataBlock(startDataBlock);
+ blockInfo.setNumberOfBlockToScan(numberOfBlockToScan);
}
- blockInfo.setFirstDataBlock(startDataBlock);
- blockInfo.setNumberOfBlockToScan(numberOfBlockToScan);
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
index 97b1a1f..34c7709 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java
@@ -122,6 +122,57 @@ public abstract class AbstractDataFileFooterConverter {
}
/**
+ * Below method will be used to get the index info from index file
+ *
+ * @param filePath file path of the index file
+ * @return list of index info
+ * @throws IOException problem while reading the index file
+ */
+ public List<DataFileFooter> getIndexInfo(String filePath) throws IOException {
+ CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
+ List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
+ String parentPath = filePath.substring(0, filePath.lastIndexOf("/"));
+ try {
+ // open the reader
+ indexReader.openThriftReader(filePath);
+ // get the index header
+ org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
+ List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
+ List<org.apache.carbondata.format.ColumnSchema> table_columns =
+ readIndexHeader.getTable_columns();
+ for (int i = 0; i < table_columns.size(); i++) {
+ columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+ }
+ // get the segment info
+ SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
+ BlockletIndex blockletIndex = null;
+ DataFileFooter dataFileFooter = null;
+ // read the block info from file
+ while (indexReader.hasNext()) {
+ BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
+ blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
+ dataFileFooter = new DataFileFooter();
+ TableBlockInfo tableBlockInfo = new TableBlockInfo();
+ tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
+ tableBlockInfo.setVersion(
+ ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
+ int blockletSize = getBlockletSize(readBlockIndexInfo);
+ tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
+ tableBlockInfo.setFilePath(parentPath + "/" + readBlockIndexInfo.file_name);
+ dataFileFooter.setBlockletIndex(blockletIndex);
+ dataFileFooter.setColumnInTable(columnSchemaList);
+ dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
+ dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
+ dataFileFooter.setSegmentInfo(segmentInfo);
+ dataFileFooters.add(dataFileFooter);
+ }
+ } finally {
+ indexReader.closeThriftReader();
+ }
+ return dataFileFooters;
+ }
+
+ /**
* the methods returns the number of blocklets in a block
*
* @param readBlockIndexInfo
@@ -148,6 +199,8 @@ public abstract class AbstractDataFileFooterConverter {
public abstract DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo)
throws IOException;
+ public abstract List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException;
+
/**
* Below method will be used to get blocklet index for data file meta
*
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 1b08263..372b302 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -52,10 +52,13 @@ import org.apache.carbondata.core.datastore.columnar.UnBlockIndexer;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO;
+import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
import org.apache.carbondata.core.keygenerator.mdkey.NumberCompressor;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.ValueEncoderMeta;
import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.core.metadata.blocklet.SegmentInfo;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
@@ -918,10 +921,26 @@ public final class CarbonUtil {
* Below method will be used to read the data file matadata
*/
public static DataFileFooter readMetadatFile(TableBlockInfo tableBlockInfo) throws IOException {
- AbstractDataFileFooterConverter fileFooterConverter =
- DataFileFooterConverterFactory.getInstance()
- .getDataFileFooterConverter(tableBlockInfo.getVersion());
- return fileFooterConverter.readDataFileFooter(tableBlockInfo);
+ BlockletDetailInfo detailInfo = tableBlockInfo.getDetailInfo();
+ if (detailInfo == null) {
+ AbstractDataFileFooterConverter fileFooterConverter =
+ DataFileFooterConverterFactory.getInstance()
+ .getDataFileFooterConverter(tableBlockInfo.getVersion());
+ return fileFooterConverter.readDataFileFooter(tableBlockInfo);
+ } else {
+ DataFileFooter fileFooter = new DataFileFooter();
+ fileFooter.setSchemaUpdatedTimeStamp(detailInfo.getSchemaUpdatedTimeStamp());
+ ColumnarFormatVersion version =
+ ColumnarFormatVersion.valueOf(detailInfo.getVersionNumber());
+ AbstractDataFileFooterConverter dataFileFooterConverter =
+ DataFileFooterConverterFactory.getInstance().getDataFileFooterConverter(version);
+ fileFooter.setColumnInTable(dataFileFooterConverter.getSchema(tableBlockInfo));
+ SegmentInfo segmentInfo = new SegmentInfo();
+ segmentInfo.setColumnCardinality(detailInfo.getDimLens());
+ segmentInfo.setNumberOfColumns(detailInfo.getRowCount());
+ fileFooter.setSegmentInfo(segmentInfo);
+ return fileFooter;
+ }
}
/**
@@ -1559,24 +1578,23 @@ public final class CarbonUtil {
}
/**
- * @param tableInfo
* @param invalidBlockVOForSegmentId
* @param updateStatusMngr
* @return
*/
- public static boolean isInvalidTableBlock(TableBlockInfo tableInfo,
+ public static boolean isInvalidTableBlock(String segmentId, String filePath,
UpdateVO invalidBlockVOForSegmentId, SegmentUpdateStatusManager updateStatusMngr) {
- if (!updateStatusMngr.isBlockValid(tableInfo.getSegmentId(),
- CarbonTablePath.getCarbonDataFileName(tableInfo.getFilePath()) + CarbonTablePath
+ if (!updateStatusMngr.isBlockValid(segmentId,
+ CarbonTablePath.getCarbonDataFileName(filePath) + CarbonTablePath
.getCarbonDataExtension())) {
return true;
}
if (null != invalidBlockVOForSegmentId) {
- Long blockTimeStamp = Long.parseLong(tableInfo.getFilePath()
- .substring(tableInfo.getFilePath().lastIndexOf('-') + 1,
- tableInfo.getFilePath().lastIndexOf('.')));
+ Long blockTimeStamp = Long.parseLong(filePath
+ .substring(filePath.lastIndexOf('-') + 1,
+ filePath.lastIndexOf('.')));
if ((blockTimeStamp > invalidBlockVOForSegmentId.getFactTimestamp() && (
invalidBlockVOForSegmentId.getUpdateDeltaStartTimestamp() != null
&& blockTimeStamp < invalidBlockVOForSegmentId.getUpdateDeltaStartTimestamp()))) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java
index 0f82b95..3ac6987 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java
@@ -121,4 +121,8 @@ public class DataFileFooterConverter extends AbstractDataFileFooterConverter {
blockletInfo.setNumberOfRows(blockletInfoThrift.getNum_rows());
return blockletInfo;
}
+
+ @Override public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
+ return null;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java
index 4882b0f..8cd437f 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java
@@ -140,4 +140,7 @@ public class DataFileFooterConverter2 extends AbstractDataFileFooterConverter {
return numberOfDimensionColumns;
}
+ @Override public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
+ return null;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
index 143c1b1..ccb8b29 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java
@@ -85,6 +85,17 @@ public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter {
return dataFileFooter;
}
+ @Override public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
+ CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath());
+ FileHeader fileHeader = carbonHeaderReader.readHeader();
+ List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
+ List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
+ for (int i = 0; i < table_columns.size(); i++) {
+ columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
+ }
+ return columnSchemaList;
+ }
+
/**
* Below method is to convert the blocklet info of the thrift to wrapper
* blocklet info
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/format/src/main/thrift/carbondata_index.thrift
----------------------------------------------------------------------
diff --git a/format/src/main/thrift/carbondata_index.thrift b/format/src/main/thrift/carbondata_index.thrift
index c055031..4df085a 100644
--- a/format/src/main/thrift/carbondata_index.thrift
+++ b/format/src/main/thrift/carbondata_index.thrift
@@ -41,4 +41,5 @@ struct BlockIndex{
2: required string file_name; // Block file name
3: required i64 offset; // Offset of the footer
4: required carbondata.BlockletIndex block_index; // Blocklet index
+ 5: optional carbondata.BlockletInfo3 blocklet_info;
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
index 16b5d69..63d37eb 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
@@ -19,7 +19,14 @@ package org.apache.carbondata.hadoop;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.DataRefNode;
@@ -367,8 +374,9 @@ public class CarbonInputFormat<T> extends FileInputFormat<Void, T> {
if (isIUDTable) {
// In case IUD is not performed in this table avoid searching for
// invalidated blocks.
- if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, invalidBlockVOForSegmentId,
- updateStatusManager)) {
+ if (CarbonUtil
+ .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(),
+ invalidBlockVOForSegmentId, updateStatusManager)) {
continue;
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
index 631bc2c..56bade7 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
@@ -29,6 +29,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.block.BlockletInfos;
import org.apache.carbondata.core.datastore.block.Distributable;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.mutate.UpdateVO;
import org.apache.carbondata.core.util.ByteUtil;
@@ -77,6 +78,8 @@ public class CarbonInputSplit extends FileSplit
*/
private String[] deleteDeltaFiles;
+ private BlockletDetailInfo detailInfo;
+
public CarbonInputSplit() {
segmentId = null;
taskId = "0";
@@ -138,10 +141,12 @@ public class CarbonInputSplit extends FileSplit
BlockletInfos blockletInfos =
new BlockletInfos(split.getNumberOfBlocklets(), 0, split.getNumberOfBlocklets());
try {
- tableBlockInfoList.add(
+ TableBlockInfo blockInfo =
new TableBlockInfo(split.getPath().toString(), split.getStart(), split.getSegmentId(),
split.getLocations(), split.getLength(), blockletInfos, split.getVersion(),
- split.getDeleteDeltaFiles()));
+ split.getDeleteDeltaFiles());
+ blockInfo.setDetailInfo(split.getDetailInfo());
+ tableBlockInfoList.add(blockInfo);
} catch (IOException e) {
throw new RuntimeException("fail to get location of split: " + split, e);
}
@@ -153,9 +158,12 @@ public class CarbonInputSplit extends FileSplit
BlockletInfos blockletInfos =
new BlockletInfos(inputSplit.getNumberOfBlocklets(), 0, inputSplit.getNumberOfBlocklets());
try {
- return new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(),
- inputSplit.getSegmentId(), inputSplit.getLocations(), inputSplit.getLength(),
- blockletInfos, inputSplit.getVersion(), inputSplit.getDeleteDeltaFiles());
+ TableBlockInfo blockInfo =
+ new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(),
+ inputSplit.getSegmentId(), inputSplit.getLocations(), inputSplit.getLength(),
+ blockletInfos, inputSplit.getVersion(), inputSplit.getDeleteDeltaFiles());
+ blockInfo.setDetailInfo(inputSplit.getDetailInfo());
+ return blockInfo;
} catch (IOException e) {
throw new RuntimeException("fail to get location of split: " + inputSplit, e);
}
@@ -180,6 +188,11 @@ public class CarbonInputSplit extends FileSplit
for (int i = 0; i < numberOfDeleteDeltaFiles; i++) {
deleteDeltaFiles[i] = in.readUTF();
}
+ boolean detailInfoExists = in.readBoolean();
+ if (detailInfoExists) {
+ detailInfo = new BlockletDetailInfo();
+ detailInfo.readFields(in);
+ }
}
@Override public void write(DataOutput out) throws IOException {
@@ -197,6 +210,10 @@ public class CarbonInputSplit extends FileSplit
out.writeUTF(deleteDeltaFiles[i]);
}
}
+ out.writeBoolean(detailInfo != null);
+ if (detailInfo != null) {
+ detailInfo.write(out);
+ }
}
public List<String> getInvalidSegments() {
@@ -310,4 +327,16 @@ public class CarbonInputSplit extends FileSplit
public String[] getDeleteDeltaFiles() {
return deleteDeltaFiles;
}
+
+ public void setDeleteDeltaFiles(String[] deleteDeltaFiles) {
+ this.deleteDeltaFiles = deleteDeltaFiles;
+ }
+
+ public BlockletDetailInfo getDetailInfo() {
+ return detailInfo;
+ }
+
+ public void setDetailInfo(BlockletDetailInfo detailInfo) {
+ this.detailInfo = detailInfo;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index ae9c676..e73c04a 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -18,152 +18,556 @@
package org.apache.carbondata.hadoop.api;
import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier;
+import org.apache.carbondata.core.indexstore.Blocklet;
+import org.apache.carbondata.core.indexstore.DataMapStoreManager;
+import org.apache.carbondata.core.indexstore.DataMapType;
+import org.apache.carbondata.core.indexstore.TableDataMap;
+import org.apache.carbondata.core.keygenerator.KeyGenException;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+import org.apache.carbondata.core.metadata.schema.PartitionInfo;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.mutate.CarbonUpdateUtil;
+import org.apache.carbondata.core.mutate.SegmentUpdateDetails;
+import org.apache.carbondata.core.mutate.UpdateVO;
+import org.apache.carbondata.core.mutate.data.BlockMappingVO;
import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.scan.model.CarbonQueryPlan;
+import org.apache.carbondata.core.scan.model.QueryModel;
+import org.apache.carbondata.core.scan.partition.PartitionUtil;
+import org.apache.carbondata.core.scan.partition.Partitioner;
+import org.apache.carbondata.core.stats.QueryStatistic;
+import org.apache.carbondata.core.stats.QueryStatisticsConstants;
+import org.apache.carbondata.core.stats.QueryStatisticsRecorder;
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
+import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager;
+import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.path.CarbonStorePath;
+import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.hadoop.CarbonMultiBlockSplit;
import org.apache.carbondata.hadoop.CarbonProjection;
-import org.apache.carbondata.hadoop.internal.CarbonInputSplit;
-import org.apache.carbondata.hadoop.internal.segment.Segment;
-import org.apache.carbondata.hadoop.internal.segment.SegmentManager;
-import org.apache.carbondata.hadoop.internal.segment.SegmentManagerFactory;
+import org.apache.carbondata.hadoop.CarbonRecordReader;
+import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
+import org.apache.carbondata.hadoop.readsupport.impl.DictionaryDecodeReadSupport;
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil;
import org.apache.carbondata.hadoop.util.ObjectSerializationUtil;
+import org.apache.carbondata.hadoop.util.SchemaReader;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.InvalidPathException;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.util.StringUtils;
/**
* Input format of CarbonData file.
+ *
* @param <T>
*/
public class CarbonTableInputFormat<T> extends FileInputFormat<Void, T> {
+ // comma separated list of input segment numbers
+ public static final String INPUT_SEGMENT_NUMBERS =
+ "mapreduce.input.carboninputformat.segmentnumbers";
+ // comma separated list of input files
+ public static final String INPUT_FILES = "mapreduce.input.carboninputformat.files";
+ private static final Log LOG = LogFactory.getLog(CarbonTableInputFormat.class);
private static final String FILTER_PREDICATE =
"mapreduce.input.carboninputformat.filter.predicate";
+ private static final String COLUMN_PROJECTION = "mapreduce.input.carboninputformat.projection";
+ private static final String CARBON_TABLE = "mapreduce.input.carboninputformat.table";
+ private static final String CARBON_READ_SUPPORT = "mapreduce.input.carboninputformat.readsupport";
- private SegmentManager segmentManager;
+ /**
+ * It is optional, if user does not set then it reads from store
+ *
+ * @param configuration
+ * @param carbonTable
+ * @throws IOException
+ */
+ public static void setCarbonTable(Configuration configuration, CarbonTable carbonTable)
+ throws IOException {
+ if (null != carbonTable) {
+ configuration.set(CARBON_TABLE, ObjectSerializationUtil.convertObjectToString(carbonTable));
+ }
+ }
- public CarbonTableInputFormat() {
- this.segmentManager = SegmentManagerFactory.getGlobalSegmentManager();
+ public static CarbonTable getCarbonTable(Configuration configuration) throws IOException {
+ String carbonTableStr = configuration.get(CARBON_TABLE);
+ if (carbonTableStr == null) {
+ populateCarbonTable(configuration);
+ // read it from schema file in the store
+ carbonTableStr = configuration.get(CARBON_TABLE);
+ return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr);
+ }
+ return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr);
}
- @Override
- public RecordReader<Void, T> createRecordReader(InputSplit split,
- TaskAttemptContext context) throws IOException, InterruptedException {
- switch (((CarbonInputSplit)split).formatType()) {
- case COLUMNAR:
- // TODO: create record reader for columnar format
- break;
- default:
- throw new RuntimeException("Unsupported format type");
+ /**
+ * this method will read the schema from the physical file and populate into CARBON_TABLE
+ *
+ * @param configuration
+ * @throws IOException
+ */
+ private static void populateCarbonTable(Configuration configuration) throws IOException {
+ String dirs = configuration.get(INPUT_DIR, "");
+ String[] inputPaths = StringUtils.split(dirs);
+ if (inputPaths.length == 0) {
+ throw new InvalidPathException("No input paths specified in job");
}
- return null;
+ AbsoluteTableIdentifier absoluteTableIdentifier =
+ AbsoluteTableIdentifier.fromTablePath(inputPaths[0]);
+ // read the schema file to get the absoluteTableIdentifier having the correct table id
+ // persisted in the schema
+ CarbonTable carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
+ setCarbonTable(configuration, carbonTable);
}
- @Override
- public List<InputSplit> getSplits(JobContext job) throws IOException {
+ public static void setTablePath(Configuration configuration, String tablePath)
+ throws IOException {
+ configuration.set(FileInputFormat.INPUT_DIR, tablePath);
+ }
- // work as following steps:
- // get all current valid segment
- // for each segment, get all input split
+ /**
+ * It sets unresolved filter expression.
+ *
+ * @param configuration
+ * @param filterExpression
+ */
+ public static void setFilterPredicates(Configuration configuration, Expression filterExpression) {
+ if (filterExpression == null) {
+ return;
+ }
+ try {
+ String filterString = ObjectSerializationUtil.convertObjectToString(filterExpression);
+ configuration.set(FILTER_PREDICATE, filterString);
+ } catch (Exception e) {
+ throw new RuntimeException("Error while setting filter expression to Job", e);
+ }
+ }
- List<InputSplit> output = new LinkedList<>();
- Expression filter = getFilter(job.getConfiguration());
- Segment[] segments = segmentManager.getAllValidSegments();
- FilterResolverIntf filterResolver = CarbonInputFormatUtil.resolveFilter(filter, null);
- for (Segment segment: segments) {
- List<InputSplit> splits = segment.getSplits(job, filterResolver);
- output.addAll(splits);
+ public static void setColumnProjection(Configuration configuration, CarbonProjection projection) {
+ if (projection == null || projection.isEmpty()) {
+ return;
+ }
+ String[] allColumns = projection.getAllColumns();
+ StringBuilder builder = new StringBuilder();
+ for (String column : allColumns) {
+ builder.append(column).append(",");
}
- return output;
+ String columnString = builder.toString();
+ columnString = columnString.substring(0, columnString.length() - 1);
+ configuration.set(COLUMN_PROJECTION, columnString);
}
- /**
- * set the table path into configuration
- * @param conf configuration of the job
- * @param tablePath table path string
- */
- public void setTablePath(Configuration conf, String tablePath) {
+ public static String getColumnProjection(Configuration configuration) {
+ return configuration.get(COLUMN_PROJECTION);
+ }
+
+ public static void setCarbonReadSupport(Configuration configuration,
+ Class<? extends CarbonReadSupport> readSupportClass) {
+ if (readSupportClass != null) {
+ configuration.set(CARBON_READ_SUPPORT, readSupportClass.getName());
+ }
+ }
+ private static CarbonTablePath getTablePath(AbsoluteTableIdentifier absIdentifier) {
+ return CarbonStorePath.getCarbonTablePath(absIdentifier);
}
/**
- * return the table path in the configuration
- * @param conf configuration of the job
- * @return table path string
+ * Set list of segments to access
*/
- public String getTablePath(Configuration conf) {
- return null;
+ public static void setSegmentsToAccess(Configuration configuration, List<String> validSegments) {
+ configuration.set(INPUT_SEGMENT_NUMBERS, CarbonUtil.getSegmentString(validSegments));
}
/**
- * set projection columns into configuration
- * @param conf configuration of the job
- * @param projection projection
+ * Set list of files to access
*/
- public void setProjection(Configuration conf, CarbonProjection projection) {
+ public static void setFilesToAccess(Configuration configuration, List<String> validFiles) {
+ configuration.set(INPUT_FILES, CarbonUtil.getSegmentString(validFiles));
+ }
+ private static AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration configuration)
+ throws IOException {
+ return getCarbonTable(configuration).getAbsoluteTableIdentifier();
}
/**
- * return the projection in the configuration
- * @param conf configuration of the job
- * @return projection
+ * {@inheritDoc}
+ * Configurations FileInputFormat.INPUT_DIR
+ * are used to get table path to read.
+ *
+ * @param job
+ * @return List<InputSplit> list of CarbonInputSplit
+ * @throws IOException
*/
- public CarbonProjection getProjection(Configuration conf) {
- return null;
+ @Override public List<InputSplit> getSplits(JobContext job) throws IOException {
+ AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
+ TableDataMap blockletMap =
+ DataMapStoreManager.getInstance().getDataMap(identifier, "blocklet", DataMapType.BLOCKLET);
+ List<String> invalidSegments = new ArrayList<>();
+ List<UpdateVO> invalidTimestampsList = new ArrayList<>();
+ List<String> validSegments = Arrays.asList(getSegmentsToAccess(job));
+ // get all valid segments and set them into the configuration
+ if (validSegments.size() == 0) {
+ SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
+ SegmentStatusManager.ValidAndInvalidSegmentsInfo segments =
+ segmentStatusManager.getValidAndInvalidSegments();
+ SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
+ validSegments = segments.getValidSegments();
+ if (validSegments.size() == 0) {
+ return new ArrayList<>(0);
+ }
+
+ // remove entry in the segment index if there are invalid segments
+ invalidSegments.addAll(segments.getInvalidSegments());
+ for (String invalidSegmentId : invalidSegments) {
+ invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
+ }
+ if (invalidSegments.size() > 0) {
+ List<TableSegmentUniqueIdentifier> invalidSegmentsIds =
+ new ArrayList<>(invalidSegments.size());
+ for (String segId : invalidSegments) {
+ invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
+ }
+ blockletMap.clear(invalidSegments);
+ }
+ }
+
+ // process and resolve the expression
+ Expression filter = getFilterPredicates(job.getConfiguration());
+ CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
+ // this will be null in case of corrupt schema file.
+ if (null == carbonTable) {
+ throw new IOException("Missing/Corrupt schema file for table.");
+ }
+
+ CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
+
+ // prune partitions for filter query on partition table
+ BitSet matchedPartitions = null;
+ if (null != filter) {
+ PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
+ if (null != partitionInfo) {
+ Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
+ matchedPartitions = new FilterExpressionProcessor()
+ .getFilteredPartitions(filter, partitionInfo, partitioner);
+ if (matchedPartitions.cardinality() == 0) {
+ // no partition is required
+ return new ArrayList<InputSplit>();
+ }
+ if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
+ // all partitions are required, no need to prune partitions
+ matchedPartitions = null;
+ }
+ }
+ }
+
+ FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
+
+ // do block filtering and get split
+ List<InputSplit> splits = getSplits(job, filterInterface, validSegments, matchedPartitions);
+ // pass the invalid segment to task side in order to remove index entry in task side
+ if (invalidSegments.size() > 0) {
+ for (InputSplit split : splits) {
+ ((org.apache.carbondata.hadoop.CarbonInputSplit) split).setInvalidSegments(invalidSegments);
+ ((org.apache.carbondata.hadoop.CarbonInputSplit) split)
+ .setInvalidTimestampRange(invalidTimestampsList);
+ }
+ }
+ return splits;
}
/**
- * set filter expression into the configuration
- * @param conf configuration of the job
- * @param filter filter expression
+ * {@inheritDoc}
+ * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
+ * are used to get table path to read.
+ *
+ * @return
+ * @throws IOException
*/
- public void setFilter(Configuration conf, Expression filter) {
+ private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver,
+ List<String> validSegments, BitSet matchedPartitions) throws IOException {
+
+ List<InputSplit> result = new LinkedList<InputSplit>();
+ UpdateVO invalidBlockVOForSegmentId = null;
+ Boolean isIUDTable = false;
+
+ AbsoluteTableIdentifier absoluteTableIdentifier =
+ getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
+ SegmentUpdateStatusManager updateStatusManager =
+ new SegmentUpdateStatusManager(absoluteTableIdentifier);
+
+ isIUDTable = (updateStatusManager.getUpdateStatusDetails().length != 0);
+
+ //for each segment fetch blocks matching filter in Driver BTree
+ List<org.apache.carbondata.hadoop.CarbonInputSplit> dataBlocksOfSegment =
+ getDataBlocksOfSegment(job, absoluteTableIdentifier, filterResolver, matchedPartitions,
+ validSegments);
+ for (org.apache.carbondata.hadoop.CarbonInputSplit inputSplit : dataBlocksOfSegment) {
+
+ // Get the UpdateVO for those tables on which IUD operations being performed.
+ if (isIUDTable) {
+ invalidBlockVOForSegmentId =
+ updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId());
+ }
+ if (isIUDTable) {
+ // In case IUD is not performed in this table avoid searching for
+ // invalidated blocks.
+ if (CarbonUtil
+ .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(),
+ invalidBlockVOForSegmentId, updateStatusManager)) {
+ continue;
+ }
+ }
+ String[] deleteDeltaFilePath = null;
+ try {
+ deleteDeltaFilePath =
+ updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString());
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+ inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath);
+ result.add(inputSplit);
+ }
+ return result;
+ }
+
+ protected Expression getFilterPredicates(Configuration configuration) {
try {
- String filterString = ObjectSerializationUtil.convertObjectToString(filter);
- conf.set(FILTER_PREDICATE, filterString);
- } catch (Exception e) {
- throw new RuntimeException("Error while setting filter expression to Job", e);
+ String filterExprString = configuration.get(FILTER_PREDICATE);
+ if (filterExprString == null) {
+ return null;
+ }
+ Object filter = ObjectSerializationUtil.convertStringToObject(filterExprString);
+ return (Expression) filter;
+ } catch (IOException e) {
+ throw new RuntimeException("Error while reading filter expression", e);
}
}
/**
- * return filter expression in the configuration
- * @param conf configuration of the job
- * @return filter expression
+ * get data blocks of given segment
*/
- public Expression getFilter(Configuration conf) {
- Object filter;
- String filterExprString = conf.get(FILTER_PREDICATE);
- if (filterExprString == null) {
- return null;
+ private List<org.apache.carbondata.hadoop.CarbonInputSplit> getDataBlocksOfSegment(JobContext job,
+ AbsoluteTableIdentifier absoluteTableIdentifier, FilterResolverIntf resolver,
+ BitSet matchedPartitions, List<String> segmentIds) throws IOException {
+
+ QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
+ QueryStatistic statistic = new QueryStatistic();
+
+ // get tokens for all the required FileSystem for table path
+ TokenCache.obtainTokensForNamenodes(job.getCredentials(),
+ new Path[] { new Path(absoluteTableIdentifier.getTablePath()) }, job.getConfiguration());
+
+ TableDataMap blockletMap = DataMapStoreManager.getInstance()
+ .getDataMap(absoluteTableIdentifier, "blocklet", DataMapType.BLOCKLET);
+ List<Blocklet> prunedBlocklets = blockletMap.prune(segmentIds, resolver);
+
+ List<org.apache.carbondata.hadoop.CarbonInputSplit> resultFilterredBlocks = new ArrayList<>();
+ for (Blocklet blocklet : prunedBlocklets) {
+ int taskId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(
+ CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath().toString()));
+
+ // matchedPartitions variable will be null in two cases as follows
+ // 1. the table is not a partition table
+ // 2. the table is a partition table, and all partitions are matched by query
+ // for partition table, the task id of carbaondata file name is the partition id.
+ // if this partition is not required, here will skip it.
+ if (matchedPartitions == null || matchedPartitions.get(taskId)) {
+ resultFilterredBlocks.add(convertToCarbonInputSplit(blocklet));
+ }
}
+ statistic
+ .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
+ recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
+ return resultFilterredBlocks;
+ }
+
+ private org.apache.carbondata.hadoop.CarbonInputSplit convertToCarbonInputSplit(Blocklet blocklet)
+ throws IOException {
+ blocklet.updateLocations();
+ org.apache.carbondata.hadoop.CarbonInputSplit split =
+ org.apache.carbondata.hadoop.CarbonInputSplit.from(blocklet.getSegmentId(),
+ new FileSplit(blocklet.getPath(), 0, blocklet.getLength(), blocklet.getLocations()),
+ ColumnarFormatVersion.valueOf((short) blocklet.getDetailInfo().getVersionNumber()));
+ split.setDetailInfo(blocklet.getDetailInfo());
+ return split;
+ }
+
+ @Override public RecordReader<Void, T> createRecordReader(InputSplit inputSplit,
+ TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
+ Configuration configuration = taskAttemptContext.getConfiguration();
+ QueryModel queryModel = getQueryModel(inputSplit, taskAttemptContext);
+ CarbonReadSupport<T> readSupport = getReadSupportClass(configuration);
+ return new CarbonRecordReader<T>(queryModel, readSupport);
+ }
+
+ public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
+ throws IOException {
+ Configuration configuration = taskAttemptContext.getConfiguration();
+ CarbonTable carbonTable = getCarbonTable(configuration);
+ // getting the table absoluteTableIdentifier from the carbonTable
+ // to avoid unnecessary deserialization
+ AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
+
+ // query plan includes projection column
+ String projection = getColumnProjection(configuration);
+ CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
+ QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
+
+ // set the filter to the query model in order to filter blocklet before scan
+ Expression filter = getFilterPredicates(configuration);
+ CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
+ FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
+ queryModel.setFilterExpressionResolverTree(filterIntf);
+
+ // update the file level index store if there are invalid segment
+ if (inputSplit instanceof CarbonMultiBlockSplit) {
+ CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
+ List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
+ if (invalidSegments.size() > 0) {
+ queryModel.setInvalidSegmentIds(invalidSegments);
+ }
+ List<UpdateVO> invalidTimestampRangeList =
+ split.getAllSplits().get(0).getInvalidTimestampRange();
+ if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
+ queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
+ }
+ }
+ return queryModel;
+ }
+
+ public CarbonReadSupport<T> getReadSupportClass(Configuration configuration) {
+ String readSupportClass = configuration.get(CARBON_READ_SUPPORT);
+ //By default it uses dictionary decoder read class
+ CarbonReadSupport<T> readSupport = null;
+ if (readSupportClass != null) {
+ try {
+ Class<?> myClass = Class.forName(readSupportClass);
+ Constructor<?> constructor = myClass.getConstructors()[0];
+ Object object = constructor.newInstance();
+ if (object instanceof CarbonReadSupport) {
+ readSupport = (CarbonReadSupport) object;
+ }
+ } catch (ClassNotFoundException ex) {
+ LOG.error("Class " + readSupportClass + "not found", ex);
+ } catch (Exception ex) {
+ LOG.error("Error while creating " + readSupportClass, ex);
+ }
+ } else {
+ readSupport = new DictionaryDecodeReadSupport<>();
+ }
+ return readSupport;
+ }
+
+ @Override protected boolean isSplitable(JobContext context, Path filename) {
try {
- filter = ObjectSerializationUtil.convertStringToObject(filterExprString);
- } catch (IOException e) {
- throw new RuntimeException("Error while reading filter expression", e);
+ // Don't split the file if it is local file system
+ FileSystem fileSystem = filename.getFileSystem(context.getConfiguration());
+ if (fileSystem instanceof LocalFileSystem) {
+ return false;
+ }
+ } catch (Exception e) {
+ return true;
+ }
+ return true;
+ }
+
+ /**
+ * required to be moved to core
+ *
+ * @return updateExtension
+ */
+ private String getUpdateExtension() {
+ // TODO: required to modify when supporting update, mostly will be update timestamp
+ return "update";
+ }
+
+ /**
+ * return valid segment to access
+ */
+ private String[] getSegmentsToAccess(JobContext job) {
+ String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, "");
+ if (segmentString.trim().isEmpty()) {
+ return new String[0];
}
- assert (filter instanceof Expression);
- return (Expression) filter;
+ return segmentString.split(",");
}
/**
- * Optional API. It can be used by query optimizer to select index based on filter
- * in the configuration of the job. After selecting index internally, index' name will be set
- * in the configuration.
+ * Get the row count of the Block and mapping of segment and Block count.
*
- * The process of selection is simple, just use the default index. Subclass can provide a more
- * advanced selection logic like cost based.
- * @param conf job configuration
+ * @param job
+ * @param identifier
+ * @return
+ * @throws IOException
+ * @throws KeyGenException
*/
- public void selectIndex(Configuration conf) {
- // set the default index in configuration
+ public BlockMappingVO getBlockRowCount(JobContext job, AbsoluteTableIdentifier identifier)
+ throws IOException, KeyGenException {
+ TableDataMap blockletMap =
+ DataMapStoreManager.getInstance().getDataMap(identifier, "blocklet", DataMapType.BLOCKLET);
+ SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
+ SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegments =
+ new SegmentStatusManager(identifier).getValidAndInvalidSegments();
+ Map<String, Long> blockRowCountMapping =
+ new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+ Map<String, Long> segmentAndBlockCountMapping =
+ new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
+ List<Blocklet> blocklets = blockletMap.prune(validAndInvalidSegments.getValidSegments(), null);
+ for (Blocklet blocklet : blocklets) {
+ String blockName = blocklet.getPath().toString();
+ blockName = CarbonTablePath.getCarbonDataFileName(blockName);
+ blockName = blockName + CarbonTablePath.getCarbonDataExtension();
+
+ long rowCount = blocklet.getDetailInfo().getRowCount();
+
+ String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
+
+ // if block is invalid then dont add the count
+ SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
+
+ if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getStatus())) {
+ Long blockCount = blockRowCountMapping.get(key);
+ if (blockCount == null) {
+ blockCount = 0L;
+ Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
+ if (count == null) {
+ count = 0L;
+ }
+ segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
+ }
+ blockCount += rowCount;
+ blockRowCountMapping.put(key, blockCount);
+ }
+ }
+ return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
index 8270304..8269757 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java
@@ -34,7 +34,7 @@ import org.apache.carbondata.core.scan.model.CarbonQueryPlan;
import org.apache.carbondata.core.scan.model.QueryDimension;
import org.apache.carbondata.core.scan.model.QueryMeasure;
import org.apache.carbondata.core.scan.model.QueryModel;
-import org.apache.carbondata.hadoop.CarbonInputFormat;
+import org.apache.carbondata.hadoop.api.CarbonTableInputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
@@ -77,9 +77,10 @@ public class CarbonInputFormatUtil {
return plan;
}
- public static <V> CarbonInputFormat<V> createCarbonInputFormat(AbsoluteTableIdentifier identifier,
+ public static <V> CarbonTableInputFormat<V> createCarbonInputFormat(
+ AbsoluteTableIdentifier identifier,
Job job) throws IOException {
- CarbonInputFormat<V> carbonInputFormat = new CarbonInputFormat<>();
+ CarbonTableInputFormat<V> carbonInputFormat = new CarbonTableInputFormat<>();
FileInputFormat.addInputPath(job, new Path(identifier.getTablePath()));
return carbonInputFormat;
}
[29/50] [abbrv] carbondata git commit: [CARBONDATA-1242] Block
distribution performance improvement. This closes #1108
Posted by ja...@apache.org.
[CARBONDATA-1242] Block distribution performance improvement. This closes #1108
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/49c64f71
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/49c64f71
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/49c64f71
Branch: refs/heads/datamap
Commit: 49c64f713f9a12a627de0d40ee238fdc3660189e
Parents: 1bd7b3d 1278c41
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Wed Jul 5 10:39:54 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 10:39:54 2017 +0530
----------------------------------------------------------------------
docs/ddl-operation-on-carbondata.md | 13 ++-----------
.../apache/carbondata/spark/load/CarbonLoaderUtil.java | 10 ++++++++--
2 files changed, 10 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
[05/50] [abbrv] carbondata git commit: show partition function
Posted by ja...@apache.org.
show partition function
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c3bfc4ad
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c3bfc4ad
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c3bfc4ad
Branch: refs/heads/datamap
Commit: c3bfc4ad87dfc66582b31b54ead2109a8e760bdb
Parents: c2b39b2
Author: mayun <si...@163.com>
Authored: Sun Jun 25 12:12:06 2017 +0800
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 13:15:51 2017 +0530
----------------------------------------------------------------------
.../examples/CarbonPartitionExample.scala | 147 +++++++++++++
.../examples/CarbonPartitionExample.scala | 49 ++++-
.../partition/TestShowPartitions.scala | 216 +++++++++++++++++++
.../carbondata/spark/util/CommonUtil.scala | 47 ++++
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 1 +
.../spark/sql/CarbonCatalystOperators.scala | 9 +-
.../org/apache/spark/sql/CarbonSqlParser.scala | 10 +-
.../execution/command/carbonTableSchema.scala | 28 +++
.../spark/sql/hive/CarbonStrategies.scala | 16 ++
.../sql/execution/command/DDLStrategy.scala | 8 +
.../execution/command/carbonTableSchema.scala | 28 +++
11 files changed, 553 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/examples/spark/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
new file mode 100644
index 0000000..2f55189
--- /dev/null
+++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.examples
+
+import scala.collection.mutable.LinkedHashMap
+
+import org.apache.spark.sql.AnalysisException
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+import org.apache.carbondata.examples.util.ExampleUtils
+
+object CarbonPartitionExample {
+
+ def main(args: Array[String]) {
+ val cc = ExampleUtils.createCarbonContext("CarbonPartitionExample")
+ val testData = ExampleUtils.currentPath + "/src/main/resources/data.csv"
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd")
+
+ // none partition table
+ cc.sql("DROP TABLE IF EXISTS t0")
+ cc.sql("""
+ | CREATE TABLE IF NOT EXISTS t0
+ | (
+ | vin String,
+ | logdate Timestamp,
+ | phonenumber Int,
+ | country String,
+ | area String
+ | )
+ | STORED BY 'carbondata'
+ """.stripMargin)
+
+ // range partition
+ cc.sql("DROP TABLE IF EXISTS t1")
+ cc.sql("""
+ | CREATE TABLE IF NOT EXISTS t1(
+ | vin STRING,
+ | phonenumber INT,
+ | country STRING,
+ | area STRING
+ | )
+ | PARTITIONED BY (logdate TIMESTAMP)
+ | STORED BY 'carbondata'
+ | TBLPROPERTIES('PARTITION_TYPE'='RANGE',
+ | 'RANGE_INFO'='2014/01/01,2015/01/01,2016/01/01')
+ """.stripMargin)
+
+ // hash partition
+ cc.sql("""
+ | CREATE TABLE IF NOT EXISTS t3(
+ | logdate Timestamp,
+ | phonenumber Int,
+ | country String,
+ | area String
+ | )
+ | PARTITIONED BY (vin String)
+ | STORED BY 'carbondata'
+ | TBLPROPERTIES('PARTITION_TYPE'='HASH','NUM_PARTITIONS'='5')
+ """.stripMargin)
+
+ // list partition
+ cc.sql("DROP TABLE IF EXISTS t5")
+ cc.sql("""
+ | CREATE TABLE IF NOT EXISTS t5(
+ | vin String,
+ | logdate Timestamp,
+ | phonenumber Int,
+ | area String
+ | )
+ | PARTITIONED BY (country string)
+ | STORED BY 'carbondata'
+ | TBLPROPERTIES('PARTITION_TYPE'='LIST',
+ | 'LIST_INFO'='(China,United States),UK ,japan,(Canada,Russia), South Korea ')
+ """.stripMargin)
+
+ cc.sql(s"DROP TABLE IF EXISTS partitionDB.t9")
+ cc.sql(s"DROP DATABASE IF EXISTS partitionDB")
+ cc.sql(s"CREATE DATABASE partitionDB")
+ cc.sql(s"""
+ | CREATE TABLE IF NOT EXISTS partitionDB.t9(
+ | logdate Timestamp,
+ | phonenumber Int,
+ | country String,
+ | area String
+ | )
+ | PARTITIONED BY (vin String)
+ | STORED BY 'carbondata'
+ | TBLPROPERTIES('PARTITION_TYPE'='HASH','NUM_PARTITIONS'='5')
+ """.stripMargin)
+ // hive partition table
+ cc.sql("DROP TABLE IF EXISTS t7")
+ cc.sql("""
+ | create table t7(id int, name string) partitioned by (city string)
+ | row format delimited fields terminated by ','
+ """.stripMargin)
+ cc.sql("alter table t7 add partition (city = 'Hangzhou')")
+ // hive partition table
+ cc.sql(s"DROP TABLE IF EXISTS hiveDB.t7")
+ cc.sql(s"CREATE DATABASE IF NOT EXISTS hiveDB")
+ cc.sql("""
+ | create table hiveDB.t7(id int, name string) partitioned by (city string)
+ | row format delimited fields terminated by ','
+ """.stripMargin)
+ cc.sql("alter table hiveDB.t7 add partition (city = 'Shanghai')")
+ // show partitions
+ try {
+ cc.sql("SHOW PARTITIONS t0").show()
+ } catch {
+ case ex: AnalysisException => print(ex.getMessage())
+ }
+ cc.sql("SHOW PARTITIONS t1").show()
+ cc.sql("SHOW PARTITIONS t3").show()
+ cc.sql("SHOW PARTITIONS t5").show()
+ cc.sql("SHOW PARTITIONS t7").show()
+ cc.sql("use hiveDB").show()
+ cc.sql("SHOW PARTITIONS t7").show()
+ cc.sql("use default").show()
+ cc.sql("SHOW PARTITIONS partitionDB.t9").show()
+
+ cc.sql("DROP TABLE IF EXISTS t0")
+ cc.sql("DROP TABLE IF EXISTS t1")
+ cc.sql("DROP TABLE IF EXISTS t3")
+ cc.sql("DROP TABLE IF EXISTS t5")
+ cc.sql("DROP TABLE IF EXISTS t7")
+ cc.sql(s"DROP TABLE IF EXISTS hiveDb.t7")
+ cc.sql(s"DROP TABLE IF EXISTS partitionDB.t9")
+ cc.sql(s"DROP DATABASE IF EXISTS partitionDB")
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
index 8a0479f..4cdde42 100644
--- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
+++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonPartitionExample.scala
@@ -19,6 +19,8 @@ package org.apache.carbondata.examples
import java.io.File
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
import org.apache.spark.sql.SparkSession
import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -50,7 +52,6 @@ object CarbonPartitionExample {
// none partition table
spark.sql("DROP TABLE IF EXISTS t0")
-
spark.sql("""
| CREATE TABLE IF NOT EXISTS t0
| (
@@ -65,7 +66,6 @@ object CarbonPartitionExample {
// range partition
spark.sql("DROP TABLE IF EXISTS t1")
-
spark.sql("""
| CREATE TABLE IF NOT EXISTS t1
| (
@@ -82,7 +82,6 @@ object CarbonPartitionExample {
// hash partition
spark.sql("DROP TABLE IF EXISTS t3")
-
spark.sql("""
| CREATE TABLE IF NOT EXISTS t3
| (
@@ -98,7 +97,6 @@ object CarbonPartitionExample {
// list partition
spark.sql("DROP TABLE IF EXISTS t5")
-
spark.sql("""
| CREATE TABLE IF NOT EXISTS t5
| (
@@ -113,14 +111,57 @@ object CarbonPartitionExample {
| 'LIST_INFO'='(China,United States),UK ,japan,(Canada,Russia), South Korea ')
""".stripMargin)
+ // hive partition table
+ spark.sql("DROP TABLE IF EXISTS t7")
+ spark.sql("""
+ | create table t7(id int, name string) partitioned by (city string)
+ | row format delimited fields terminated by ','
+ """.stripMargin)
+ spark.sql("alter table t7 add partition (city = 'Hangzhou')")
+
+ // not default db partition table
+ try {
+ spark.sql(s"DROP TABLE IF EXISTS partitionDB.t9")
+ } catch {
+ case ex: NoSuchDatabaseException => print(ex.getMessage())
+ }
+ spark.sql(s"DROP DATABASE IF EXISTS partitionDB")
+ spark.sql(s"CREATE DATABASE partitionDB")
+ spark.sql(s"""
+ | CREATE TABLE IF NOT EXISTS partitionDB.t9(
+ | logdate Timestamp,
+ | phonenumber Int,
+ | country String,
+ | area String
+ | )
+ | PARTITIONED BY (vin String)
+ | STORED BY 'carbondata'
+ | TBLPROPERTIES('PARTITION_TYPE'='HASH','NUM_PARTITIONS'='5')
+ """.stripMargin)
+
// show tables
spark.sql("SHOW TABLES").show()
+ // show partitions
+ try {
+ spark.sql("""SHOW PARTITIONS t0""").show()
+ } catch {
+ case ex: AnalysisException => print(ex.getMessage())
+ }
+ spark.sql("""SHOW PARTITIONS t1""").show()
+ spark.sql("""SHOW PARTITIONS t3""").show()
+ spark.sql("""SHOW PARTITIONS t5""").show()
+ spark.sql("""SHOW PARTITIONS t7""").show()
+ spark.sql("""SHOW PARTITIONS partitionDB.t9""").show()
+
// drop table
spark.sql("DROP TABLE IF EXISTS t0")
spark.sql("DROP TABLE IF EXISTS t1")
spark.sql("DROP TABLE IF EXISTS t3")
spark.sql("DROP TABLE IF EXISTS t5")
+ spark.sql("DROP TABLE IF EXISTS t7")
+ spark.sql("DROP TABLE IF EXISTS partitionDB.t9")
+ spark.sql(s"DROP DATABASE IF EXISTS partitionDB")
spark.close()
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestShowPartitions.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestShowPartitions.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestShowPartitions.scala
new file mode 100644
index 0000000..7b53964
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestShowPartitions.scala
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.partition
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.common.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+class TestShowPartition extends QueryTest with BeforeAndAfterAll {
+ override def beforeAll = {
+
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
+
+ sql("drop table if exists notPartitionTable")
+ sql("""
+ | CREATE TABLE notPartitionTable
+ | (
+ | vin String,
+ | logdate Timestamp,
+ | phonenumber Int,
+ | country String,
+ | area String
+ | )
+ | STORED BY 'carbondata'
+ """.stripMargin)
+
+ sql("drop table if exists hashTable")
+ sql(
+ """
+ | CREATE TABLE hashTable (empname String, designation String, doj Timestamp,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (empno int)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='HASH','NUM_PARTITIONS'='3')
+ """.stripMargin)
+
+ sql("drop table if exists rangeTable")
+ sql(
+ """
+ | CREATE TABLE rangeTable (empno int, empname String, designation String,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (doj Timestamp)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='RANGE',
+ | 'RANGE_INFO'='01-01-2010, 01-01-2015')
+ """.stripMargin)
+
+ sql("drop table if exists listTable")
+ sql(
+ """
+ | CREATE TABLE listTable (empno int, empname String, designation String, doj Timestamp,
+ | workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (workgroupcategory int)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='LIST',
+ | 'LIST_INFO'='0, 1, (2, 3)')
+ """.stripMargin)
+
+ sql(s"CREATE DATABASE if not exists partitionDB")
+ sql("drop table if exists partitionDB.hashTable")
+ sql("drop table if exists partitionDB.rangeTable")
+ sql("drop table if exists partitionDB.listTable")
+ sql(
+ """
+ | CREATE TABLE partitionDB.hashTable (empname String, designation String, doj Timestamp,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (empno int)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='HASH','NUM_PARTITIONS'='3')
+ """.stripMargin)
+ sql(
+ """
+ | CREATE TABLE partitionDB.rangeTable (empno int, empname String, designation String,
+ | workgroupcategory int, workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (doj Timestamp)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='RANGE',
+ | 'RANGE_INFO'='01-01-2010, 01-01-2015')
+ """.stripMargin)
+ sql(
+ """
+ | CREATE TABLE partitionDB.listTable (empno int, empname String, designation String,
+ | doj Timestamp,workgroupcategoryname String, deptno int, deptname String,
+ | projectcode int, projectjoindate Timestamp, projectenddate Timestamp,attendance int,
+ | utilization int,salary int)
+ | PARTITIONED BY (workgroupcategory int)
+ | STORED BY 'org.apache.carbondata.format'
+ | TBLPROPERTIES('PARTITION_TYPE'='LIST',
+ | 'LIST_INFO'='0, 1, (2, 3)')
+ """.stripMargin)
+
+ sql("DROP TABLE IF EXISTS hiveTable")
+ sql("""
+ | create table hiveTable(id int, name string) partitioned by (city string)
+ | row format delimited fields terminated by ','
+ """.stripMargin)
+ sql("alter table hiveTable add partition (city = 'Hangzhou')")
+
+ sql(s"CREATE DATABASE if not exists hiveDB")
+ sql("DROP TABLE IF EXISTS hiveDB.hiveTable")
+ sql("""
+ | create table hiveDB.hiveTable(id int, name string) partitioned by (city string)
+ | row format delimited fields terminated by ','
+ """.stripMargin)
+ sql("alter table hiveDB.hiveTable add partition (city = 'Shanghai')")
+ }
+
+ test("show partition table: exception when show not partition table") {
+ val errorMessage =
+ intercept[AnalysisException] { sql("show partitions notPartitionTable").show() }
+ assert(errorMessage.getMessage.contains(
+ "SHOW PARTITIONS is not allowed on a table that is not partitioned: notpartitiontable"))
+ }
+
+ test("show partition table: hash table") {
+ // EqualTo
+ checkAnswer(sql("show partitions hashTable"), Seq(Row("empno=HASH_NUMBER(3)")))
+
+ }
+
+ test("show partition table: range partition") {
+ // EqualTo
+ checkAnswer(sql("show partitions rangeTable"), Seq(Row("doj=default"),
+ Row("doj<01-01-2010"), Row("01-01-2010<=doj<01-01-2015")))
+ }
+
+ test("show partition table: list partition") {
+ // EqualTo
+ checkAnswer(sql("show partitions listTable"), Seq(Row("workgroupcategory=default"),
+ Row("workgroupcategory=0"), Row("workgroupcategory=1"), Row("workgroupcategory=2, 3")))
+
+ }
+ test("show partition table: not default db") {
+ // EqualTo
+ checkAnswer(sql("show partitions partitionDB.hashTable"), Seq(Row("empno=HASH_NUMBER(3)")))
+ // EqualTo
+ checkAnswer(sql("show partitions partitionDB.rangeTable"), Seq(Row("doj=default"),
+ Row("doj<01-01-2010"), Row("01-01-2010<=doj<01-01-2015")))
+ // EqualTo
+ checkAnswer(sql("show partitions partitionDB.listTable"), Seq(Row("workgroupcategory=default"),
+ Row("workgroupcategory=0"), Row("workgroupcategory=1"), Row("workgroupcategory=2, 3")))
+
+ }
+
+ test("show partition table: hive partition table") {
+ // EqualTo
+ checkAnswer(sql("show partitions hiveTable"), Seq(Row("city=Hangzhou")))
+ sql("use hiveDB").show()
+ checkAnswer(sql("show partitions hiveTable"), Seq(Row("city=Shanghai")))
+ sql("use default").show()
+ }
+
+ override def afterAll = {
+ sql("drop table if exists notPartitionTable")
+ sql("drop table if exists hashTable")
+ sql("drop table if exists listTable")
+ sql("drop table if exists rangeTable")
+ sql("drop table if exists hiveTable")
+ try {
+ sql("drop table if exists partitionDB.hashTable")
+
+ } catch {
+ case ex: NoSuchDatabaseException => print(ex.getMessage())
+ }
+ try {
+ sql("drop table if exists partitionDB.rangeTable")
+ } catch {
+ case ex: NoSuchDatabaseException => print(ex.getMessage())
+ }
+ try {
+ sql("drop table if exists partitionDB.listTable")
+ } catch {
+ case ex: NoSuchDatabaseException => print(ex.getMessage())
+ }
+ try {
+ sql("drop table if exists hiveDB.hiveTable")
+ } catch {
+ case ex: NoSuchDatabaseException => print(ex.getMessage())
+ }
+ sql("DROP DATABASE if exists partitionDB")
+ sql("DROP DATABASE if exists hiveDB")
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
index d3b6f8d..ac2e311 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
@@ -27,12 +27,20 @@ import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.spark.SparkContext
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.execution.command.{ColumnProperty, Field, PartitionerField}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.RowFactory
+import org.apache.spark.sql.types.MetadataBuilder
+import org.apache.spark.sql.types.StringType
import org.apache.spark.util.FileUtils
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.metadata.datatype.DataType
+import org.apache.carbondata.core.metadata.schema.partition.PartitionType
+import org.apache.carbondata.core.metadata.schema.PartitionInfo
import org.apache.carbondata.core.statusmanager.SegmentStatusManager
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataTypeUtil}
import org.apache.carbondata.processing.csvload.CSVInputFormat
@@ -544,4 +552,43 @@ object CommonUtil {
}
}
+ def getPartitionInfo(columnName: String, partitionType: PartitionType,
+ partitionInfo: PartitionInfo): Seq[Row] = {
+ var result = Seq.newBuilder[Row]
+ partitionType match {
+ case PartitionType.RANGE =>
+ result.+=(RowFactory.create(columnName + "=default"))
+ var rangeInfo = partitionInfo.getRangeInfo
+ var size = rangeInfo.size() - 1
+ for (index <- 0 to size) {
+ if (index == 0) {
+ result.+=(RowFactory.create(columnName + "<" + rangeInfo.get(index)))
+ } else {
+ result.+=(RowFactory.create(rangeInfo.get(index - 1) + "<=" +
+ columnName + "<" + rangeInfo.get(index)))
+ }
+ }
+ case PartitionType.RANGE_INTERVAL =>
+ result.+=(RowFactory.create(columnName + "="))
+ case PartitionType.LIST =>
+ result.+=(RowFactory.create(columnName + "=default"))
+ var listInfo = partitionInfo.getListInfo
+ listInfo.asScala.foreach {
+ f =>
+ result.+=(RowFactory.create(columnName + "=" +
+ f.toArray().mkString(", ")))
+ }
+ case PartitionType.HASH =>
+ var hashNumber = partitionInfo.getNumPartitions
+ result.+=(RowFactory.create(columnName + "=HASH_NUMBER(" + hashNumber.toString() + ")"))
+ case others =>
+ result.+=(RowFactory.create(columnName + "="))
+ }
+ result.result()
+ }
+
+ def partitionInfoOutput: Seq[Attribute] = Seq(
+ AttributeReference("partition", StringType, nullable = false,
+ new MetadataBuilder().putString("comment", "partitions info").build())()
+ )
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 383d308..c565c31 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -112,6 +112,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
protected val PARTITION_COUNT = carbonKeyWord("PARTITION_COUNT")
protected val PARTITIONDATA = carbonKeyWord("PARTITIONDATA")
protected val PARTITIONER = carbonKeyWord("PARTITIONER")
+ protected val PARTITIONS = carbonKeyWord("PARTITIONS")
protected val QUOTECHAR = carbonKeyWord("QUOTECHAR")
protected val RELATION = carbonKeyWord("RELATION")
protected val SCHEMA = carbonKeyWord("SCHEMA")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
index c1a0dc2..024c54b 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{UnaryNode, _}
+import org.apache.spark.sql.catalyst.plans.logical.{ UnaryNode, _ }
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.optimizer.CarbonDecoderRelation
import org.apache.spark.sql.types._
import org.apache.carbondata.spark.CarbonAliasDecoderRelation
+import org.apache.carbondata.spark.util.CommonUtil
/**
* Top command
@@ -137,6 +138,12 @@ case class DeleteRecords(
override def output: Seq[AttributeReference] = Seq.empty
}
+case class ShowPartitions(
+ table: TableIdentifier) extends LogicalPlan {
+ override def children: Seq[LogicalPlan] = Seq.empty
+ override def output: Seq[Attribute] = CommonUtil.partitionInfoOutput
+}
+
/**
* A logical plan representing insertion into Hive table.
* This plan ignores nullability of ArrayType, MapType, StructType unlike InsertIntoTable
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index f12e54b..a664104 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -61,7 +61,8 @@ class CarbonSqlParser() extends CarbonDDLSqlParser {
protected lazy val startCommand: Parser[LogicalPlan] =
createDatabase | dropDatabase | loadManagement | describeTable |
- showLoads | alterTable | updateTable | deleteRecords | useDatabase | createTable
+ showPartitions | showLoads | alterTable | updateTable | deleteRecords | useDatabase |
+ createTable
protected lazy val loadManagement: Parser[LogicalPlan] =
deleteLoadsByID | deleteLoadsByLoadDate | cleanFiles | loadDataNew
@@ -487,6 +488,13 @@ class CarbonSqlParser() extends CarbonDDLSqlParser {
}
UpdateTable(relation, columns, selectStmt, where)
}
+ protected lazy val showPartitions: Parser[LogicalPlan] =
+ (SHOW ~> PARTITIONS ~> table) <~ opt(";") ^^ {
+ case table =>
+ val tableName = getTableName(table.tableIdentifier)
+ val alias = table.alias.getOrElse("")
+ ShowPartitions(table.tableIdentifier)
+ }
private def splitQuery(query: String): (String, String) = {
val stack = scala.collection.mutable.Stack[Char]()
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index ba22c3c..3477abb 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -73,6 +73,34 @@ object Checker {
}
/**
+ * Command for show table partitions Command
+ *
+ * @param tableIdentifier
+ */
+private[sql] case class ShowCarbonPartitionsCommand(
+ tableIdentifier: TableIdentifier) extends RunnableCommand {
+ val LOGGER = LogServiceFactory.getLogService(ShowCarbonPartitionsCommand.getClass.getName)
+ override val output = CommonUtil.partitionInfoOutput
+ override def run(sqlContext: SQLContext): Seq[Row] = {
+ val relation = CarbonEnv.get.carbonMetastore
+ .lookupRelation1(tableIdentifier)(sqlContext).
+ asInstanceOf[CarbonRelation]
+ val carbonTable = relation.tableMeta.carbonTable
+ var tableName = carbonTable.getFactTableName
+ var partitionInfo = carbonTable.getPartitionInfo(
+ carbonTable.getAbsoluteTableIdentifier.getCarbonTableIdentifier.getTableName)
+ if (partitionInfo == null) {
+ throw new AnalysisException(
+ s"SHOW PARTITIONS is not allowed on a table that is not partitioned: $tableName")
+ }
+ var partitionType = partitionInfo.getPartitionType
+ var columnName = partitionInfo.getColumnSchemaList.get(0).getColumnName
+ LOGGER.info("partition column name:" + columnName)
+ CommonUtil.getPartitionInfo(columnName, partitionType, partitionInfo)
+ }
+}
+
+/**
* Command for the compaction in alter table command
*
* @param alterTableModel
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
index f0cd33b..aba39f7 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
@@ -316,6 +316,22 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
} else {
ExecutedCommand(HiveNativeCommand(sql)) :: Nil
}
+ case ShowPartitions(t) =>
+ val isCarbonTable = CarbonEnv.get.carbonMetastore
+ .tableExists(t)(sqlContext)
+ if (isCarbonTable) {
+ ExecutedCommand(ShowCarbonPartitionsCommand(t)) :: Nil
+ } else {
+ var tableName = t.table
+ var database = t.database
+ var sql: String = null
+ if (database.isEmpty) {
+ sql = s"show partitions $tableName"
+ } else {
+ sql = s"show partitions $database.$tableName"
+ }
+ ExecutedCommand(HiveNativeCommand(sql)) :: Nil
+ }
case _ =>
Nil
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/DDLStrategy.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/DDLStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/DDLStrategy.scala
index 7d0215f..6087736 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/DDLStrategy.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/DDLStrategy.scala
@@ -115,6 +115,14 @@ class DDLStrategy(sparkSession: SparkSession) extends SparkStrategy {
sparkSession.sessionState.executePlan(UnresolvedRelation(identifier, None)).analyzed
val resultPlan = sparkSession.sessionState.executePlan(resolvedTable).executedPlan
ExecutedCommandExec(DescribeCommandFormatted(resultPlan, plan.output, identifier)) :: Nil
+ case ShowPartitionsCommand(t, cols) =>
+ val isCarbonTable = CarbonEnv.getInstance(sparkSession).carbonMetastore
+ .tableExists(t)(sparkSession)
+ if (isCarbonTable) {
+ ExecutedCommandExec(ShowCarbonPartitionsCommand(t)) :: Nil
+ } else {
+ ExecutedCommandExec(ShowPartitionsCommand(t, cols)) :: Nil
+ }
case set@SetCommand(kv) =>
ExecutedCommandExec(CarbonSetCommand(set)) :: Nil
case reset@ResetCommand =>
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3bfc4ad/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index f9f556d..8fe4bd7 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -73,6 +73,34 @@ object Checker {
}
/**
+ * Command for show table partitions Command
+ *
+ * @param tableIdentifier
+ */
+private[sql] case class ShowCarbonPartitionsCommand(
+ tableIdentifier: TableIdentifier) extends RunnableCommand {
+ val LOGGER = LogServiceFactory.getLogService(ShowCarbonPartitionsCommand.getClass.getName)
+ override val output = CommonUtil.partitionInfoOutput
+ override def run(sparkSession: SparkSession): Seq[Row] = {
+ val relation = CarbonEnv.getInstance(sparkSession).carbonMetastore
+ .lookupRelation(tableIdentifier)(sparkSession).
+ asInstanceOf[CarbonRelation]
+ val carbonTable = relation.tableMeta.carbonTable
+ var tableName = carbonTable.getFactTableName
+ var partitionInfo = carbonTable.getPartitionInfo(
+ carbonTable.getAbsoluteTableIdentifier.getCarbonTableIdentifier.getTableName)
+ if (partitionInfo == null) {
+ throw new AnalysisException(
+ s"SHOW PARTITIONS is not allowed on a table that is not partitioned: $tableName")
+ }
+ var partitionType = partitionInfo.getPartitionType
+ var columnName = partitionInfo.getColumnSchemaList.get(0).getColumnName
+ LOGGER.info("partition column name:" + columnName)
+ CommonUtil.getPartitionInfo(columnName, partitionType, partitionInfo)
+ }
+}
+
+/**
* Command for the compaction in alter table command
*
* @param alterTableModel
[04/50] [abbrv] carbondata git commit: [CARBONDATA-1240] Added
testcase for bad record with update operation. This closes #1106.
Posted by ja...@apache.org.
[CARBONDATA-1240] Added testcase for bad record with update operation. This closes #1106.
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c2b39b26
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c2b39b26
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c2b39b26
Branch: refs/heads/datamap
Commit: c2b39b26ede78809e68b7acf7308db02f3a26ea7
Parents: 480ebb8 026ceae
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Thu Jun 29 12:50:33 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jun 29 12:50:33 2017 +0530
----------------------------------------------------------------------
.../src/test/resources/IUD/badrecord.csv | 3 +
...UpdateCarbonTableTestCaseWithBadRecord.scala | 70 ++++++++++++++++++++
2 files changed, 73 insertions(+)
----------------------------------------------------------------------
[45/50] [abbrv] carbondata git commit: [CARBONDATA-1271] Enhanced
Performance for Hive Integration with Carbondata
Posted by ja...@apache.org.
[CARBONDATA-1271] Enhanced Performance for Hive Integration with Carbondata
This closes #1142
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cbe14197
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cbe14197
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cbe14197
Branch: refs/heads/datamap
Commit: cbe141976a53a558b84d6e31baf3ec54a9bc38cc
Parents: 285ce72
Author: Bhavya <bh...@knoldus.com>
Authored: Thu Jul 6 11:53:03 2017 +0530
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Jul 12 17:40:11 2017 +0800
----------------------------------------------------------------------
.../core/stats/QueryStatisticsRecorderImpl.java | 80 +++---
.../carbondata/hadoop/CarbonInputFormat.java | 7 +-
.../carbondata/hive/CarbonArrayInspector.java | 4 -
.../hive/CarbonDictionaryDecodeReadSupport.java | 288 +++++++++++++++++++
.../carbondata/hive/CarbonHiveInputSplit.java | 23 +-
.../carbondata/hive/CarbonHiveRecordReader.java | 67 ++---
.../apache/carbondata/hive/CarbonHiveSerDe.java | 36 +--
.../hive/MapredCarbonInputFormat.java | 129 ++++++---
.../hive/server/HiveEmbeddedServer2.java | 1 +
9 files changed, 477 insertions(+), 158 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsRecorderImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsRecorderImpl.java b/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsRecorderImpl.java
index f84a674..ffb7d7f 100644
--- a/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsRecorderImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsRecorderImpl.java
@@ -101,45 +101,47 @@ public class QueryStatisticsRecorderImpl implements QueryStatisticsRecorder, Ser
long scannedPages = 0;
try {
for (QueryStatistic statistic : queryStatistics) {
- switch (statistic.getMessage()) {
- case QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR:
- load_blocks_time += statistic.getTimeTaken();
- break;
- case QueryStatisticsConstants.SCAN_BLOCKlET_TIME:
- scan_blocks_time += statistic.getCount();
- break;
- case QueryStatisticsConstants.SCAN_BLOCKS_NUM:
- scan_blocks_num += statistic.getCount();
- break;
- case QueryStatisticsConstants.LOAD_DICTIONARY:
- load_dictionary_time += statistic.getTimeTaken();
- break;
- case QueryStatisticsConstants.RESULT_SIZE:
- result_size += statistic.getCount();
- break;
- case QueryStatisticsConstants.EXECUTOR_PART:
- total_executor_time += statistic.getTimeTaken();
- break;
- case QueryStatisticsConstants.TOTAL_BLOCKLET_NUM:
- total_blocklet = statistic.getCount();
- break;
- case QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM:
- valid_scan_blocklet = statistic.getCount();
- break;
- case QueryStatisticsConstants.VALID_PAGE_SCANNED:
- valid_pages_blocklet = statistic.getCount();
- break;
- case QueryStatisticsConstants.TOTAL_PAGE_SCANNED:
- total_pages = statistic.getCount();
- break;
- case QueryStatisticsConstants.READ_BLOCKlET_TIME:
- readTime = statistic.getCount();
- break;
- case QueryStatisticsConstants.PAGE_SCANNED:
- scannedPages = statistic.getCount();
- break;
- default:
- break;
+ if (statistic.getMessage() != null) {
+ switch (statistic.getMessage()) {
+ case QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR:
+ load_blocks_time += statistic.getTimeTaken();
+ break;
+ case QueryStatisticsConstants.SCAN_BLOCKlET_TIME:
+ scan_blocks_time += statistic.getCount();
+ break;
+ case QueryStatisticsConstants.SCAN_BLOCKS_NUM:
+ scan_blocks_num += statistic.getCount();
+ break;
+ case QueryStatisticsConstants.LOAD_DICTIONARY:
+ load_dictionary_time += statistic.getTimeTaken();
+ break;
+ case QueryStatisticsConstants.RESULT_SIZE:
+ result_size += statistic.getCount();
+ break;
+ case QueryStatisticsConstants.EXECUTOR_PART:
+ total_executor_time += statistic.getTimeTaken();
+ break;
+ case QueryStatisticsConstants.TOTAL_BLOCKLET_NUM:
+ total_blocklet = statistic.getCount();
+ break;
+ case QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM:
+ valid_scan_blocklet = statistic.getCount();
+ break;
+ case QueryStatisticsConstants.VALID_PAGE_SCANNED:
+ valid_pages_blocklet = statistic.getCount();
+ break;
+ case QueryStatisticsConstants.TOTAL_PAGE_SCANNED:
+ total_pages = statistic.getCount();
+ break;
+ case QueryStatisticsConstants.READ_BLOCKlET_TIME:
+ readTime = statistic.getCount();
+ break;
+ case QueryStatisticsConstants.PAGE_SCANNED:
+ scannedPages = statistic.getCount();
+ break;
+ default:
+ break;
+ }
}
}
String headers =
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
index 1e69648..16b5d69 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
@@ -444,9 +444,14 @@ public class CarbonInputFormat<T> extends FileInputFormat<Void, T> {
}
}
}
+
+ // For Hive integration if we have to get the stats we have to fetch hive.query.id
+ String query_id = job.getConfiguration().get("query.id") != null ?
+ job.getConfiguration().get("query.id") :
+ job.getConfiguration().get("hive.query.id");
statistic
.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
- recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
+ recorder.recordStatisticsForDriver(statistic, query_id);
return resultFilterredBlocks;
} finally {
// clean up the access count for a segment as soon as its usage is complete so that in
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonArrayInspector.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonArrayInspector.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonArrayInspector.java
index 49e068a..b26c959 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonArrayInspector.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonArrayInspector.java
@@ -18,7 +18,6 @@ package org.apache.carbondata.hive;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -122,9 +121,6 @@ class CarbonArrayInspector implements SettableListObjectInspector {
final Writable[] array = ((ArrayWritable) subObj).get();
final List<Writable> list = Arrays.asList(array);
-
- Collections.addAll(list, array);
-
return list;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
new file mode 100644
index 0000000..bc66d49
--- /dev/null
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonDictionaryDecodeReadSupport.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.hive;
+
+import java.io.IOException;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.cache.dictionary.Dictionary;
+import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.encoder.Encoding;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
+import org.apache.carbondata.core.util.CarbonUtil;
+
+import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.util.GenericArrayData;
+
+/**
+ * This is the class to decode dictionary encoded column data back to its original value.
+ */
+public class CarbonDictionaryDecodeReadSupport<T> implements CarbonReadSupport<T> {
+
+ protected Dictionary[] dictionaries;
+
+ protected DataType[] dataTypes;
+ /**
+ * carbon columns
+ */
+ protected CarbonColumn[] carbonColumns;
+
+ protected Writable[] writableArr;
+
+ /**
+ * This initialization is done inside executor task
+ * for column dictionary involved in decoding.
+ *
+ * @param carbonColumns column list
+ * @param absoluteTableIdentifier table identifier
+ */
+ @Override public void initialize(CarbonColumn[] carbonColumns,
+ AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
+ this.carbonColumns = carbonColumns;
+ dictionaries = new Dictionary[carbonColumns.length];
+ dataTypes = new DataType[carbonColumns.length];
+ for (int i = 0; i < carbonColumns.length; i++) {
+ if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i]
+ .hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
+ CacheProvider cacheProvider = CacheProvider.getInstance();
+ Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider
+ .createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
+ dataTypes[i] = carbonColumns[i].getDataType();
+ dictionaries[i] = forwardDictionaryCache.get(
+ new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(),
+ carbonColumns[i].getColumnIdentifier(), dataTypes[i]));
+ } else {
+ dataTypes[i] = carbonColumns[i].getDataType();
+ }
+ }
+ }
+
+ @Override public T readRow(Object[] data) {
+ assert (data.length == dictionaries.length);
+ writableArr = new Writable[data.length];
+ for (int i = 0; i < dictionaries.length; i++) {
+ if (dictionaries[i] != null) {
+ data[i] = dictionaries[i].getDictionaryValueForKey((int) data[i]);
+ }
+ try {
+ writableArr[i] = createWritableObject(data[i], carbonColumns[i]);
+ } catch (IOException e) {
+ throw new RuntimeException(e.getMessage(), e);
+ }
+ }
+
+ return (T) writableArr;
+ }
+
+ /**
+ * to book keep the dictionary cache or update access count for each
+ * column involved during decode, to facilitate LRU cache policy if memory
+ * threshold is reached
+ */
+ @Override public void close() {
+ if (dictionaries == null) {
+ return;
+ }
+ for (int i = 0; i < dictionaries.length; i++) {
+ CarbonUtil.clearDictionaryCache(dictionaries[i]);
+ }
+ }
+
+ /**
+ * To Create the Writable from the CarbonData data
+ *
+ * @param obj
+ * @param carbonColumn
+ * @return
+ * @throws IOException
+ */
+ private Writable createWritableObject(Object obj, CarbonColumn carbonColumn) throws IOException {
+ DataType dataType = carbonColumn.getDataType();
+ switch (dataType) {
+ case STRUCT:
+ return createStruct(obj, carbonColumn);
+ case ARRAY:
+ return createArray(obj, carbonColumn);
+ default:
+ return createWritablePrimitive(obj, carbonColumn);
+ }
+ }
+
+ /**
+ * Create Array Data for Array Datatype
+ *
+ * @param obj
+ * @param carbonColumn
+ * @return
+ * @throws IOException
+ */
+ private ArrayWritable createArray(Object obj, CarbonColumn carbonColumn) throws IOException {
+ if (obj instanceof GenericArrayData) {
+ Object[] objArray = ((GenericArrayData) obj).array();
+ List<CarbonDimension> childCarbonDimensions = null;
+ CarbonDimension arrayDimension = null;
+ if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+ childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+ arrayDimension = childCarbonDimensions.get(0);
+ }
+ List array = new ArrayList();
+ if (objArray != null) {
+ for (int i = 0; i < objArray.length; i++) {
+ Object curObj = objArray[i];
+ Writable newObj = createWritableObject(curObj, arrayDimension);
+ array.add(newObj);
+ }
+ }
+ if (array.size() > 0) {
+ ArrayWritable subArray = new ArrayWritable(Writable.class,
+ (Writable[]) array.toArray(new Writable[array.size()]));
+ return new ArrayWritable(Writable.class, new Writable[] { subArray });
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Create the Struct data for the Struct Datatype
+ *
+ * @param obj
+ * @param carbonColumn
+ * @return
+ * @throws IOException
+ */
+ private ArrayWritable createStruct(Object obj, CarbonColumn carbonColumn) throws IOException {
+ if (obj instanceof GenericInternalRow) {
+ Object[] objArray = ((GenericInternalRow) obj).values();
+ List<CarbonDimension> childCarbonDimensions = null;
+ if (carbonColumn.isDimension() && carbonColumn.getColumnSchema().getNumberOfChild() > 0) {
+ childCarbonDimensions = ((CarbonDimension) carbonColumn).getListOfChildDimensions();
+ }
+ Writable[] arr = new Writable[objArray.length];
+ for (int i = 0; i < objArray.length; i++) {
+
+ arr[i] = createWritableObject(objArray[i], childCarbonDimensions.get(i));
+ }
+ return new ArrayWritable(Writable.class, arr);
+ }
+ throw new IOException("DataType not supported in Carbondata");
+ }
+
+ /**
+ * This method will create the Writable Objects for primitives.
+ *
+ * @param obj
+ * @param carbonColumn
+ * @return
+ * @throws IOException
+ */
+ private Writable createWritablePrimitive(Object obj, CarbonColumn carbonColumn)
+ throws IOException {
+ DataType dataType = carbonColumn.getDataType();
+ if (obj == null) {
+ return null;
+ }
+ switch (dataType) {
+ case NULL:
+ return null;
+ case DOUBLE:
+ return new DoubleWritable((double) obj);
+ case INT:
+ return new IntWritable((int) obj);
+ case LONG:
+ return new LongWritable((long) obj);
+ case SHORT:
+ return new ShortWritable((Short) obj);
+ case DATE:
+ return new DateWritable(new Date((Integer) obj));
+ case TIMESTAMP:
+ return new TimestampWritable(new Timestamp((long) obj));
+ case STRING:
+ return new Text(obj.toString());
+ case DECIMAL:
+ return new HiveDecimalWritable(
+ HiveDecimal.create(new java.math.BigDecimal(obj.toString())));
+ }
+ throw new IOException("Unknown primitive : " + dataType.getName());
+ }
+
+ /**
+ * If we need to use the same Writable[] then we can use this method
+ *
+ * @param writable
+ * @param obj
+ * @param carbonColumn
+ * @throws IOException
+ */
+ private void setPrimitive(Writable writable, Object obj, CarbonColumn carbonColumn)
+ throws IOException {
+ DataType dataType = carbonColumn.getDataType();
+ if (obj == null) {
+ writable.write(null);
+ }
+ switch (dataType) {
+ case DOUBLE:
+ ((DoubleWritable) writable).set((double) obj);
+ break;
+ case INT:
+ ((IntWritable) writable).set((int) obj);
+ break;
+ case LONG:
+ ((LongWritable) writable).set((long) obj);
+ break;
+ case SHORT:
+ ((ShortWritable) writable).set((short) obj);
+ break;
+ case DATE:
+ ((DateWritable) writable).set(new Date((Long) obj));
+ break;
+ case TIMESTAMP:
+ ((TimestampWritable) writable).set(new Timestamp((long) obj));
+ break;
+ case STRING:
+ ((Text) writable).set(obj.toString());
+ break;
+ case DECIMAL:
+ ((HiveDecimalWritable) writable)
+ .set(HiveDecimal.create(new java.math.BigDecimal(obj.toString())));
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveInputSplit.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveInputSplit.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveInputSplit.java
index bfe4d27..b922295 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveInputSplit.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveInputSplit.java
@@ -113,8 +113,7 @@ public class CarbonHiveInputSplit extends FileSplit
}
public static CarbonHiveInputSplit from(String segmentId, FileSplit split,
- ColumnarFormatVersion version)
- throws IOException {
+ ColumnarFormatVersion version) throws IOException {
return new CarbonHiveInputSplit(segmentId, split.getPath(), split.getStart(), split.getLength(),
split.getLocations(), version);
}
@@ -151,8 +150,7 @@ public class CarbonHiveInputSplit extends FileSplit
return segmentId;
}
- @Override
- public void readFields(DataInput in) throws IOException {
+ @Override public void readFields(DataInput in) throws IOException {
super.readFields(in);
this.segmentId = in.readUTF();
this.version = ColumnarFormatVersion.valueOf(in.readShort());
@@ -162,10 +160,10 @@ public class CarbonHiveInputSplit extends FileSplit
for (int i = 0; i < numInvalidSegment; i++) {
invalidSegments.add(in.readUTF());
}
+ this.numberOfBlocklets = in.readInt();
}
- @Override
- public void write(DataOutput out) throws IOException {
+ @Override public void write(DataOutput out) throws IOException {
super.write(out);
out.writeUTF(segmentId);
out.writeShort(version.number());
@@ -174,6 +172,7 @@ public class CarbonHiveInputSplit extends FileSplit
for (String invalidSegment : invalidSegments) {
out.writeUTF(invalidSegment);
}
+ out.writeInt(numberOfBlocklets);
}
public List<String> getInvalidSegments() {
@@ -213,8 +212,7 @@ public class CarbonHiveInputSplit extends FileSplit
return bucketId;
}
- @Override
- public int compareTo(Distributable o) {
+ @Override public int compareTo(Distributable o) {
if (o == null) {
return -1;
}
@@ -264,18 +262,15 @@ public class CarbonHiveInputSplit extends FileSplit
return 0;
}
- @Override
- public String getBlockPath() {
+ @Override public String getBlockPath() {
return getPath().getName();
}
- @Override
- public List<Long> getMatchedBlocklets() {
+ @Override public List<Long> getMatchedBlocklets() {
return null;
}
- @Override
- public boolean fullScan() {
+ @Override public boolean fullScan() {
return true;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
index e4df02e..2a92185 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveRecordReader.java
@@ -62,6 +62,8 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
private ArrayWritable valueObj = null;
private CarbonObjectInspector objInspector;
+ private long recordReaderCounter = 0;
+ private int[] columnIds;
public CarbonHiveRecordReader(QueryModel queryModel, CarbonReadSupport<ArrayWritable> readSupport,
InputSplit inputSplit, JobConf jobConf) throws IOException {
@@ -88,17 +90,12 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
} catch (QueryExecutionException e) {
throw new IOException(e.getMessage(), e.getCause());
}
- if (valueObj == null) {
- valueObj =
- new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
- }
-
final TypeInfo rowTypeInfo;
final List<String> columnNames;
List<TypeInfo> columnTypes;
// Get column names and sort order
final String colIds = conf.get("hive.io.file.readcolumn.ids");
- final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
+ final String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
if (columnNameProperty.length() == 0) {
@@ -111,47 +108,39 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
+
+ if (valueObj == null) {
+ valueObj = new ArrayWritable(Writable.class, new Writable[columnTypes.size()]);
+ }
+
if (!colIds.equals("")) {
String[] arraySelectedColId = colIds.split(",");
List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
-
- for (String anArrayColId : arraySelectedColId) {
- reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
+ columnIds = new int[arraySelectedColId.length];
+ int columnId = 0;
+ for (int j = 0; j < arraySelectedColId.length; j++) {
+ columnId = Integer.parseInt(arraySelectedColId[j]);
+ columnIds[j] = columnId;
}
- // Create row related objects
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
- this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
- } else {
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
- this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
+
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
@Override public boolean next(Void aVoid, ArrayWritable value) throws IOException {
if (carbonIterator.hasNext()) {
Object obj = readSupport.readRow(carbonIterator.next());
- ArrayWritable tmpValue;
- try {
- tmpValue = createArrayWritable(obj);
- } catch (SerDeException se) {
- throw new IOException(se.getMessage(), se.getCause());
- }
-
- if (value != tmpValue) {
- final Writable[] arrValue = value.get();
- final Writable[] arrCurrent = tmpValue.get();
- if (valueObj != null && arrValue.length == arrCurrent.length) {
- System.arraycopy(arrCurrent, 0, arrValue, 0, arrCurrent.length);
- } else {
- if (arrValue.length != arrCurrent.length) {
- throw new IOException(
- "CarbonHiveInput : size of object differs. Value" + " size : " + arrValue.length
- + ", Current Object size : " + arrCurrent.length);
- } else {
- throw new IOException("CarbonHiveInput can not support RecordReaders that"
- + " don't return same key & value & value is null");
- }
+ recordReaderCounter++;
+ Writable[] objArray = (Writable[]) obj;
+ Writable[] sysArray = new Writable[value.get().length];
+ if (columnIds != null && columnIds.length > 0 && objArray.length == columnIds.length) {
+ for (int i = 0; i < columnIds.length; i++) {
+ sysArray[columnIds[i]] = objArray[i];
}
+ value.set(sysArray);
+ } else {
+ value.set(objArray);
}
return true;
} else {
@@ -159,10 +148,6 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
}
}
- private ArrayWritable createArrayWritable(Object obj) throws SerDeException {
- return createStruct(obj, objInspector);
- }
-
@Override public Void createKey() {
return null;
}
@@ -172,7 +157,7 @@ class CarbonHiveRecordReader extends CarbonRecordReader<ArrayWritable>
}
@Override public long getPos() throws IOException {
- return 0;
+ return recordReaderCounter;
}
@Override public float getProgress() throws IOException {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveSerDe.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveSerDe.java b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveSerDe.java
index f66f3ed..2980ad3 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveSerDe.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/CarbonHiveSerDe.java
@@ -79,11 +79,9 @@ class CarbonHiveSerDe extends AbstractSerDe {
final TypeInfo rowTypeInfo;
final List<String> columnNames;
- final List<String> reqColNames;
final List<TypeInfo> columnTypes;
// Get column names and sort order
assert configuration != null;
- final String colIds = configuration.get("hive.io.file.readcolumn.ids");
final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
@@ -98,29 +96,17 @@ class CarbonHiveSerDe extends AbstractSerDe {
} else {
columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
}
- if (colIds != null && !colIds.equals("")) {
- reqColNames = new ArrayList<String>();
-
- String[] arraySelectedColId = colIds.split(",");
- List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
- for (String anArrayColId : arraySelectedColId) {
- reqColNames.add(columnNames.get(Integer.parseInt(anArrayColId)));
- reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
- }
- // Create row related objects
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(reqColNames, reqColTypes);
- this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
- }
- else {
- // Create row related objects
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
- this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
-
- // Stats part
- serializedSize = 0;
- deserializedSize = 0;
- status = LAST_OPERATION.UNKNOWN;
- }
+
+
+
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
+
+ // Stats part
+ serializedSize = 0;
+ deserializedSize = 0;
+ status = LAST_OPERATION.UNKNOWN;
}
@Override public Class<? extends Writable> getSerializedClass() {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
index 7a1c9db..58f25c9 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/MapredCarbonInputFormat.java
@@ -17,12 +17,12 @@
package org.apache.carbondata.hive;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.scan.expression.Expression;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
import org.apache.carbondata.core.scan.model.CarbonQueryPlan;
@@ -31,8 +31,11 @@ import org.apache.carbondata.hadoop.CarbonInputFormat;
import org.apache.carbondata.hadoop.CarbonInputSplit;
import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport;
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil;
+import org.apache.carbondata.hadoop.util.ObjectSerializationUtil;
+import org.apache.carbondata.hadoop.util.SchemaReader;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
import org.apache.hadoop.io.ArrayWritable;
@@ -42,9 +45,11 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.StringUtils;
public class MapredCarbonInputFormat extends CarbonInputFormat<ArrayWritable>
implements InputFormat<Void, ArrayWritable>, CombineHiveInputFormat.AvoidSplitCombination {
+ private static final String CARBON_TABLE = "mapreduce.input.carboninputformat.table";
@Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
org.apache.hadoop.mapreduce.JobContext jobContext = Job.getInstance(jobConf);
@@ -63,47 +68,64 @@ public class MapredCarbonInputFormat extends CarbonInputFormat<ArrayWritable>
@Override
public RecordReader<Void, ArrayWritable> getRecordReader(InputSplit inputSplit, JobConf jobConf,
Reporter reporter) throws IOException {
- QueryModel queryModel = getQueryModel(jobConf);
- CarbonReadSupport<ArrayWritable> readSupport = getReadSupportClass(jobConf);
+ String path = null;
+ if (inputSplit instanceof CarbonHiveInputSplit) {
+ path = ((CarbonHiveInputSplit) inputSplit).getPath().toString();
+ }
+ QueryModel queryModel = getQueryModel(jobConf, path);
+ CarbonReadSupport<ArrayWritable> readSupport = new CarbonDictionaryDecodeReadSupport<>();
return new CarbonHiveRecordReader(queryModel, readSupport, inputSplit, jobConf);
}
- private QueryModel getQueryModel(Configuration configuration) throws IOException {
- CarbonTable carbonTable = getCarbonTable(configuration);
+ /**
+ * this method will read the schema from the physical file and populate into CARBON_TABLE
+ *
+ * @param configuration
+ * @throws IOException
+ */
+ private static void populateCarbonTable(Configuration configuration, String paths)
+ throws IOException {
+ String dirs = configuration.get(INPUT_DIR, "");
+ String[] inputPaths = StringUtils.split(dirs);
+ String validInputPath = null;
+ if (inputPaths.length == 0) {
+ throw new InvalidPathException("No input paths specified in job");
+ } else {
+ if (paths != null) {
+ for (String inputPath : inputPaths) {
+ if (paths.startsWith(inputPath)) {
+ validInputPath = inputPath;
+ break;
+ }
+ }
+ }
+ }
+ AbsoluteTableIdentifier absoluteTableIdentifier =
+ AbsoluteTableIdentifier.fromTablePath(validInputPath);
+ // read the schema file to get the absoluteTableIdentifier having the correct table id
+ // persisted in the schema
+ CarbonTable carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
+ setCarbonTable(configuration, carbonTable);
+ }
+
+ private static CarbonTable getCarbonTable(Configuration configuration, String path)
+ throws IOException {
+ populateCarbonTable(configuration, path);
+ // read it from schema file in the store
+ String carbonTableStr = configuration.get(CARBON_TABLE);
+ return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr);
+ }
+
+ private QueryModel getQueryModel(Configuration configuration, String path) throws IOException {
+ CarbonTable carbonTable = getCarbonTable(configuration, path);
// getting the table absoluteTableIdentifier from the carbonTable
// to avoid unnecessary deserialization
StringBuilder colNames = new StringBuilder();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
- // query plan includes projection column
- String projection = getColumnProjection(configuration);
- if (projection == null) {
- projection = configuration.get("hive.io.file.readcolumn.names");
- }
- if (projection.equals("")) {
- List<CarbonDimension> carbonDimensionList = carbonTable.getAllDimensions();
- List<CarbonMeasure> carbonMeasureList = carbonTable.getAllMeasures();
-
- for (CarbonDimension aCarbonDimensionList : carbonDimensionList) {
- colNames = new StringBuilder((colNames + (aCarbonDimensionList.getColName())) + ",");
- }
- if (carbonMeasureList.size() < 1) {
- colNames = new StringBuilder(colNames.substring(0, colNames.lastIndexOf(",")));
- }
- for (int index = 0; index < carbonMeasureList.size(); index++) {
- if (!carbonMeasureList.get(index).getColName().equals("default_dummy_measure")) {
- if (index == carbonMeasureList.size() - 1) {
- colNames.append(carbonMeasureList.get(index).getColName());
- } else {
- colNames =
- new StringBuilder((colNames + (carbonMeasureList.get(index).getColName())) + ",");
- }
- }
- }
- projection = colNames.toString().trim();
- configuration.set("hive.io.file.readcolumn.names", colNames.toString());
- }
+ String projection = getProjection(configuration, carbonTable,
+ identifier.getCarbonTableIdentifier().getTableName());
CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
// set the filter to the query model in order to filter blocklet before scan
@@ -115,6 +137,45 @@ public class MapredCarbonInputFormat extends CarbonInputFormat<ArrayWritable>
return queryModel;
}
+ /**
+ * Return the Projection for the CarbonQuery.
+ *
+ * @param configuration
+ * @param carbonTable
+ * @param tableName
+ * @return
+ */
+ private String getProjection(Configuration configuration, CarbonTable carbonTable,
+ String tableName) {
+ // query plan includes projection column
+ String projection = getColumnProjection(configuration);
+ if (projection == null) {
+ projection = configuration.get("hive.io.file.readcolumn.names");
+ }
+ List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(tableName);
+ List<String> carbonColumnNames = new ArrayList<>();
+ StringBuilder allColumns = new StringBuilder();
+ StringBuilder projectionColumns = new StringBuilder();
+ for (CarbonColumn column : carbonColumns) {
+ carbonColumnNames.add(column.getColName());
+ allColumns.append(column.getColName() + ",");
+ }
+
+ if (!projection.equals("")) {
+ String[] columnNames = projection.split(",");
+ //verify that the columns parsed by Hive exist in the table
+ for (String col : columnNames) {
+ //show columns command will return these data
+ if (carbonColumnNames.contains(col)) {
+ projectionColumns.append(col + ",");
+ }
+ }
+ return projectionColumns.substring(0, projectionColumns.lastIndexOf(","));
+ } else {
+ return allColumns.substring(0, allColumns.lastIndexOf(","));
+ }
+ }
+
@Override public boolean shouldSkipCombine(Path path, Configuration conf) throws IOException {
return true;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbe14197/integration/hive/src/main/java/org/apache/carbondata/hive/server/HiveEmbeddedServer2.java
----------------------------------------------------------------------
diff --git a/integration/hive/src/main/java/org/apache/carbondata/hive/server/HiveEmbeddedServer2.java b/integration/hive/src/main/java/org/apache/carbondata/hive/server/HiveEmbeddedServer2.java
index d8705f8..ae931fb 100644
--- a/integration/hive/src/main/java/org/apache/carbondata/hive/server/HiveEmbeddedServer2.java
+++ b/integration/hive/src/main/java/org/apache/carbondata/hive/server/HiveEmbeddedServer2.java
@@ -130,6 +130,7 @@ public class HiveEmbeddedServer2 {
conf.set("hive.added.files.path", "");
conf.set("hive.added.archives.path", "");
conf.set("fs.default.name", "file:///");
+ conf.set(HiveConf.ConfVars.SUBMITLOCALTASKVIACHILD.varname, "false");
// clear mapred.job.tracker - Hadoop defaults to 'local' if not defined. Hive however expects
// this to be set to 'local' - if it's not, it does a remote execution (i.e. no child JVM)
[11/50] [abbrv] carbondata git commit: fix unsafe column page bug
Posted by ja...@apache.org.
fix unsafe column page bug
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fdb672ad
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fdb672ad
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fdb672ad
Branch: refs/heads/datamap
Commit: fdb672ad946c0fe5b9982aee9b09717db36a54f7
Parents: ad80006
Author: jackylk <ja...@huawei.com>
Authored: Fri Jun 30 18:27:08 2017 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Sat Jul 1 13:09:24 2017 +0800
----------------------------------------------------------------------
.../page/UnsafeVarLengthColumnPage.java | 35 ++++++++++++++++----
.../datastore/page/VarLengthColumnPageBase.java | 3 +-
.../resources/big_decimal_without_header.csv | 5 +++
.../TestLoadDataWithHiveSyntaxUnsafe.scala | 25 +++++++++++++-
4 files changed, 59 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/fdb672ad/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeVarLengthColumnPage.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeVarLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeVarLengthColumnPage.java
index 75b5312..dd6abc5 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeVarLengthColumnPage.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeVarLengthColumnPage.java
@@ -47,6 +47,11 @@ public class UnsafeVarLengthColumnPage extends VarLengthColumnPageBase {
private static final double FACTOR = 1.25;
+ /**
+ * create a page
+ * @param dataType data type
+ * @param pageSize number of row
+ */
UnsafeVarLengthColumnPage(DataType dataType, int pageSize) throws MemoryException {
super(dataType, pageSize);
capacity = (int) (pageSize * DEFAULT_ROW_SIZE * FACTOR);
@@ -55,6 +60,20 @@ public class UnsafeVarLengthColumnPage extends VarLengthColumnPageBase {
baseOffset = memoryBlock.getBaseOffset();
}
+ /**
+ * create a page with initial capacity
+ * @param dataType data type
+ * @param pageSize number of row
+ * @param capacity initial capacity of the page, in bytes
+ */
+ UnsafeVarLengthColumnPage(DataType dataType, int pageSize, int capacity) throws MemoryException {
+ super(dataType, pageSize);
+ this.capacity = capacity;
+ memoryBlock = UnsafeMemoryManager.allocateMemoryWithRetry((long)(capacity));
+ baseAddress = memoryBlock.getBaseObject();
+ baseOffset = memoryBlock.getBaseOffset();
+ }
+
@Override
public void freeMemory() {
if (memoryBlock != null) {
@@ -65,6 +84,9 @@ public class UnsafeVarLengthColumnPage extends VarLengthColumnPageBase {
}
}
+ /**
+ * reallocate memory if capacity length than current size + request size
+ */
private void ensureMemory(int requestSize) throws MemoryException {
if (totalLength + requestSize > capacity) {
int newSize = 2 * capacity;
@@ -81,17 +103,16 @@ public class UnsafeVarLengthColumnPage extends VarLengthColumnPageBase {
@Override
public void putBytesAtRow(int rowId, byte[] bytes) {
- try {
- ensureMemory(bytes.length);
- } catch (MemoryException e) {
- throw new RuntimeException(e);
- }
- CarbonUnsafe.unsafe.copyMemory(bytes, CarbonUnsafe.BYTE_ARRAY_OFFSET,
- baseAddress, baseOffset + rowOffset[rowId], bytes.length);
+ putBytes(rowId, bytes, 0, bytes.length);
}
@Override
public void putBytes(int rowId, byte[] bytes, int offset, int length) {
+ try {
+ ensureMemory(length);
+ } catch (MemoryException e) {
+ throw new RuntimeException(e);
+ }
CarbonUnsafe.unsafe.copyMemory(bytes, CarbonUnsafe.BYTE_ARRAY_OFFSET + offset,
baseAddress, baseOffset + rowOffset[rowId], length);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/fdb672ad/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
index a897d54..801cfb3 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
@@ -105,8 +105,9 @@ public abstract class VarLengthColumnPageBase extends ColumnPage {
int numRows = rowId;
VarLengthColumnPageBase page;
+ int inputDataLength = offset;
if (unsafe) {
- page = new UnsafeVarLengthColumnPage(DECIMAL, numRows);
+ page = new UnsafeVarLengthColumnPage(DECIMAL, numRows, inputDataLength);
} else {
page = new SafeVarLengthColumnPage(DECIMAL, numRows);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/fdb672ad/integration/spark-common-test/src/test/resources/big_decimal_without_header.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/big_decimal_without_header.csv b/integration/spark-common-test/src/test/resources/big_decimal_without_header.csv
new file mode 100644
index 0000000..4e99384
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/big_decimal_without_header.csv
@@ -0,0 +1,5 @@
+1,32473289848372638424.8218378712
+2,99487323423232324232.2434323233
+3,12773443434389239382.4309238238
+4,38488747823423323726.3589238237
+5,93838663748166353423.4273832762
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/fdb672ad/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
index 2a9d1d9..c713865 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadDataWithHiveSyntaxUnsafe.scala
@@ -65,6 +65,8 @@ class TestLoadDataWithHiveSyntaxUnsafe extends QueryTest with BeforeAndAfterAll
sql("drop table if exists comment_test")
sql("drop table if exists smallinttable")
sql("drop table if exists smallinthivetable")
+ sql("drop table if exists decimal_varlength")
+ sql("drop table if exists decimal_varlength_hive")
sql(
"CREATE table carbontable (empno int, empname String, designation String, doj String, " +
"workgroupcategory int, workgroupcategoryname String, deptno int, deptname String, " +
@@ -77,7 +79,18 @@ class TestLoadDataWithHiveSyntaxUnsafe extends QueryTest with BeforeAndAfterAll
"projectcode int, projectjoindate String,projectenddate String, attendance String," +
"utilization String,salary String)row format delimited fields terminated by ','"
)
-
+ sql(
+ """
+ | CREATE TABLE decimal_varlength(id string, value decimal(30,10))
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin
+ )
+ sql(
+ """
+ | CREATE TABLE decimal_varlength_hive(id string, value decimal(30,10))
+ | ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+ """.stripMargin
+ )
}
test("create table with smallint type and query smallint table") {
@@ -674,6 +687,14 @@ class TestLoadDataWithHiveSyntaxUnsafe extends QueryTest with BeforeAndAfterAll
Row("~carbon,")))
}
+ test("test decimal var lenght comlumn page") {
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/big_decimal_without_header.csv' INTO TABLE decimal_varlength" +
+ s" OPTIONS('FILEHEADER'='id,value')")
+ sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/big_decimal_without_header.csv' INTO TABLE decimal_varlength_hive")
+ checkAnswer(sql("select value from decimal_varlength"), sql("select value from decimal_varlength_hive"))
+ checkAnswer(sql("select sum(value) from decimal_varlength"), sql("select sum(value) from decimal_varlength_hive"))
+ }
+
override def afterAll {
sql("drop table if exists escapechar1")
sql("drop table if exists escapechar2")
@@ -701,6 +722,8 @@ class TestLoadDataWithHiveSyntaxUnsafe extends QueryTest with BeforeAndAfterAll
sql("drop table if exists carbontable1")
sql("drop table if exists hivetable1")
sql("drop table if exists comment_test")
+ sql("drop table if exists decimal_varlength")
+ sql("drop table if exists decimal_varlength_hive")
CarbonProperties.getInstance().addProperty(
CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING,
CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING_DEFAULT
[27/50] [abbrv] carbondata git commit: [CARBONDATA-1255] updated
ddl-operation-on-carbondata.md for column_group feature
Posted by ja...@apache.org.
[CARBONDATA-1255] updated ddl-operation-on-carbondata.md for column_group feature
This closes #1127
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/427b88b1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/427b88b1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/427b88b1
Branch: refs/heads/datamap
Commit: 427b88b1e48c8ca2ec1eb382b5617ee81040b437
Parents: 1bd7b3d
Author: vandana <va...@gmail.com>
Authored: Mon Jul 3 12:00:09 2017 +0530
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Jul 5 11:49:14 2017 +0800
----------------------------------------------------------------------
docs/ddl-operation-on-carbondata.md | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/427b88b1/docs/ddl-operation-on-carbondata.md
----------------------------------------------------------------------
diff --git a/docs/ddl-operation-on-carbondata.md b/docs/ddl-operation-on-carbondata.md
index 66c9d30..79d1139 100644
--- a/docs/ddl-operation-on-carbondata.md
+++ b/docs/ddl-operation-on-carbondata.md
@@ -71,14 +71,7 @@ The following DDL operations are supported in CarbonData :
Here, DICTIONARY_EXCLUDE will exclude dictionary creation. This is applicable for high-cardinality columns and is an optional parameter. DICTIONARY_INCLUDE will generate dictionary for the columns specified in the list.
- - **Row/Column Format Configuration**
- Column groups with more than one column are stored in row format, instead of columnar format. By default, each column is a separate column group.
-
-```
- TBLPROPERTIES ('COLUMN_GROUPS'='(column1, column2),
- (Column3,Column4,Column5)')
-```
- **Table Block Size Configuration**
@@ -120,8 +113,7 @@ The following DDL operations are supported in CarbonData :
saleQuantity Int,
revenue Int)
STORED BY 'carbondata'
- TBLPROPERTIES ('COLUMN_GROUPS'='(productNumber,productName)',
- 'DICTIONARY_EXCLUDE'='storeCity',
+ TBLPROPERTIES ('DICTIONARY_EXCLUDE'='storeCity',
'DICTIONARY_INCLUDE'='productNumber',
'NO_INVERTED_INDEX'='productBatch')
```
@@ -402,8 +394,7 @@ of columns is used.
productBatch String,
revenue Int)
STORED BY 'carbondata'
- TBLPROPERTIES ('COLUMN_GROUPS'='(productNumber,saleQuantity)',
- 'DICTIONARY_EXCLUDE'='productName',
+ TBLPROPERTIES ('DICTIONARY_EXCLUDE'='productName',
'DICTIONARY_INCLUDE'='productNumber,saleQuantity',
'NO_INVERTED_INDEX'='productBatch',
'BUCKETNUMBER'='4',
[50/50] [abbrv] carbondata git commit: [CARBONDATA-1232] Datamap
implementation for Blocklet
Posted by ja...@apache.org.
[CARBONDATA-1232] Datamap implementation for Blocklet
This closes #1099
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b385d14b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b385d14b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b385d14b
Branch: refs/heads/datamap
Commit: b385d14b4f234210e0de35242a11900e63d48948
Parents: 9e4da2a
Author: ravipesala <ra...@gmail.com>
Authored: Sat Jun 17 22:53:57 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Wed Jul 12 23:02:02 2017 +0800
----------------------------------------------------------------------
.../carbondata/core/cache/CacheProvider.java | 3 +
.../apache/carbondata/core/cache/CacheType.java | 6 +
.../core/datastore/block/TableBlockInfo.java | 19 +
.../core/datastore/block/TaskBlockInfo.java | 4 +
.../carbondata/core/indexstore/Blocklet.java | 55 +-
.../indexstore/BlockletDataMapIndexStore.java | 180 ++++++
.../core/indexstore/BlockletDetailInfo.java | 117 ++++
.../carbondata/core/indexstore/DataMap.java | 8 +-
.../core/indexstore/DataMapFactory.java | 87 +++
.../core/indexstore/DataMapStoreManager.java | 90 ++-
.../carbondata/core/indexstore/DataMapType.java | 14 +-
.../TableBlockIndexUniqueIdentifier.java | 103 ++++
.../core/indexstore/TableDataMap.java | 97 +++-
.../core/indexstore/UnsafeMemoryDMStore.java | 207 +++++++
.../blockletindex/BlockletDMComparator.java | 134 +++++
.../blockletindex/BlockletDataMap.java | 445 +++++++++++++++
.../blockletindex/BlockletDataMapFactory.java | 115 ++++
.../BlockletDataRefNodeWrapper.java | 137 +++++
.../indexstore/blockletindex/IndexWrapper.java | 49 ++
.../core/indexstore/row/DataMapRow.java | 89 +++
.../core/indexstore/row/DataMapRowImpl.java | 106 ++++
.../core/indexstore/row/UnsafeDataMapRow.java | 133 +++++
.../core/indexstore/schema/DataMapSchema.java | 124 ++++
.../core/indexstore/schema/FilterType.java | 24 +
.../core/metadata/blocklet/BlockletInfo.java | 53 +-
.../core/metadata/index/BlockIndexInfo.java | 27 +
.../executor/impl/AbstractQueryExecutor.java | 52 +-
.../executer/IncludeFilterExecuterImpl.java | 2 +-
.../executer/RangeValueFilterExecuterImpl.java | 2 +-
.../RowLevelRangeGrtThanFiterExecuterImpl.java | 2 +-
...elRangeGrtrThanEquaToFilterExecuterImpl.java | 2 +-
...velRangeLessThanEqualFilterExecuterImpl.java | 2 +-
.../RowLevelRangeLessThanFiterExecuterImpl.java | 2 +-
.../processor/AbstractDataBlockIterator.java | 3 +
.../AbstractDetailQueryResultIterator.java | 34 +-
.../util/AbstractDataFileFooterConverter.java | 53 ++
.../apache/carbondata/core/util/CarbonUtil.java | 40 +-
.../core/util/DataFileFooterConverter.java | 4 +
.../core/util/DataFileFooterConverter2.java | 3 +
.../core/util/DataFileFooterConverterV3.java | 11 +
format/src/main/thrift/carbondata_index.thrift | 1 +
.../carbondata/hadoop/CarbonInputFormat.java | 14 +-
.../carbondata/hadoop/CarbonInputSplit.java | 39 +-
.../hadoop/api/CarbonTableInputFormat.java | 562 ++++++++++++++++---
.../hadoop/util/CarbonInputFormatUtil.java | 7 +-
.../presto/impl/CarbonTableReader.java | 56 +-
.../spark/rdd/CarbonIUDMergerRDD.scala | 5 +-
.../carbondata/spark/rdd/CarbonMergerRDD.scala | 9 +-
.../carbondata/spark/rdd/CarbonScanRDD.scala | 20 +-
.../carbondata/spark/util/QueryPlanUtil.scala | 10 +-
.../sql/CarbonDatasourceHadoopRelation.scala | 14 +-
.../sql/execution/command/IUDCommands.scala | 7 -
.../carbondata/spark/util/QueryPlanUtil.scala | 10 +-
.../sql/CarbonDatasourceHadoopRelation.scala | 5 +-
.../apache/spark/sql/hive/CarbonMetastore.scala | 10 +-
.../processing/merger/CarbonCompactionUtil.java | 32 ++
56 files changed, 3174 insertions(+), 265 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java
index 25a8976..5c4b265 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java
@@ -31,6 +31,7 @@ import org.apache.carbondata.core.datastore.BlockIndexStore;
import org.apache.carbondata.core.datastore.SegmentTaskIndexStore;
import org.apache.carbondata.core.datastore.block.AbstractIndex;
import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
+import org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore;
import org.apache.carbondata.core.util.CarbonProperties;
/**
@@ -126,6 +127,8 @@ public class CacheProvider {
} else if (cacheType.equals(cacheType.DRIVER_BTREE)) {
cacheObject =
new SegmentTaskIndexStore(carbonStorePath, carbonLRUCache);
+ } else if (cacheType.equals(cacheType.DRIVER_BLOCKLET_DATAMAP)) {
+ cacheObject = new BlockletDataMapIndexStore(carbonStorePath, carbonLRUCache);
}
cacheTypeToCacheMap.put(cacheType, cacheObject);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java b/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java
index 2d6570d..ab51ff2 100644
--- a/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java
+++ b/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java
@@ -56,6 +56,12 @@ public class CacheType<K, V> {
DRIVER_BTREE = new CacheType("driver_btree");
/**
+ * Executor BTree cache which maintains size of BTree metadata
+ */
+ public static final CacheType<TableSegmentUniqueIdentifier, SegmentTaskIndexWrapper>
+ DRIVER_BLOCKLET_DATAMAP = new CacheType("driver_blocklet_datamap");
+
+ /**
* cacheName which is unique name for a cache
*/
private String cacheName;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
index 44347cf..f003882 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java
@@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;
@@ -77,6 +78,8 @@ public class TableBlockInfo implements Distributable, Serializable {
*/
private String[] deletedDeltaFilePath;
+ private BlockletDetailInfo detailInfo;
+
public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations,
long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) {
this.filePath = FileFactory.getUpdatedFilePath(filePath);
@@ -88,6 +91,10 @@ public class TableBlockInfo implements Distributable, Serializable {
this.deletedDeltaFilePath = deletedDeltaFilePath;
}
+ public TableBlockInfo() {
+
+ }
+
/**
* constructor to initialize the TbaleBlockInfo with BlockletInfos
*
@@ -319,4 +326,16 @@ public class TableBlockInfo implements Distributable, Serializable {
public String[] getDeletedDeltaFilePath() {
return deletedDeltaFilePath;
}
+
+ public void setFilePath(String filePath) {
+ this.filePath = filePath;
+ }
+
+ public BlockletDetailInfo getDetailInfo() {
+ return detailInfo;
+ }
+
+ public void setDetailInfo(BlockletDetailInfo detailInfo) {
+ this.detailInfo = detailInfo;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java
index eb707c2..4fcec87 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java
@@ -17,6 +17,7 @@
package org.apache.carbondata.core.datastore.block;
+import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -45,6 +46,9 @@ public class TaskBlockInfo {
return taskBlockInfoMapping.keySet();
}
+ public Collection<List<TableBlockInfo>> getAllTableBlockInfoList() {
+ return taskBlockInfoMapping.values();
+ }
/**
* returns TableBlockInfoList of given task
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java
index 597c46c..66da4d0 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java
@@ -16,27 +16,76 @@
*/
package org.apache.carbondata.core.indexstore;
+import java.io.IOException;
import java.io.Serializable;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
/**
* Blocklet
*/
public class Blocklet implements Serializable {
- private String path;
+ private Path path;
+
+ private String segmentId;
private String blockletId;
+ private BlockletDetailInfo detailInfo;
+
+ private long length;
+
+ private String[] location;
+
public Blocklet(String path, String blockletId) {
- this.path = path;
+ this.path = new Path(path);
this.blockletId = blockletId;
}
- public String getPath() {
+ public Path getPath() {
return path;
}
public String getBlockletId() {
return blockletId;
}
+
+ public BlockletDetailInfo getDetailInfo() {
+ return detailInfo;
+ }
+
+ public void setDetailInfo(BlockletDetailInfo detailInfo) {
+ this.detailInfo = detailInfo;
+ }
+
+ public void updateLocations() throws IOException {
+ FileSystem fs = path.getFileSystem(FileFactory.getConfiguration());
+ RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
+ LocatedFileStatus fileStatus = iter.next();
+ location = fileStatus.getBlockLocations()[0].getHosts();
+ length = fileStatus.getLen();
+ }
+
+ public String[] getLocations() throws IOException {
+ return location;
+ }
+
+ public long getLength() throws IOException {
+ return length;
+ }
+
+ public String getSegmentId() {
+ return segmentId;
+ }
+
+ public void setSegmentId(String segmentId) {
+ this.segmentId = segmentId;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
new file mode 100644
index 0000000..fc8c273
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CarbonLRUCache;
+import org.apache.carbondata.core.datastore.exception.IndexBuilderException;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap;
+
+/**
+ * Class to handle loading, unloading,clearing,storing of the table
+ * blocks
+ */
+public class BlockletDataMapIndexStore
+ implements Cache<TableBlockIndexUniqueIdentifier, BlockletDataMap> {
+ private static final LogService LOGGER =
+ LogServiceFactory.getLogService(BlockletDataMapIndexStore.class.getName());
+ /**
+ * carbon store path
+ */
+ protected String carbonStorePath;
+ /**
+ * CarbonLRU cache
+ */
+ protected CarbonLRUCache lruCache;
+
+ /**
+ * map of block info to lock object map, while loading the btree this will be filled
+ * and removed after loading the tree for that particular block info, this will be useful
+ * while loading the tree concurrently so only block level lock will be applied another
+ * block can be loaded concurrently
+ */
+ private Map<String, Object> segmentLockMap;
+
+ /**
+ * constructor to initialize the SegmentTaskIndexStore
+ *
+ * @param carbonStorePath
+ * @param lruCache
+ */
+ public BlockletDataMapIndexStore(String carbonStorePath, CarbonLRUCache lruCache) {
+ this.carbonStorePath = carbonStorePath;
+ this.lruCache = lruCache;
+ segmentLockMap = new ConcurrentHashMap<String, Object>();
+ }
+
+ @Override public BlockletDataMap get(TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier)
+ throws IOException {
+ String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
+ BlockletDataMap dataMap = (BlockletDataMap) lruCache.get(lruCacheKey);
+ if (dataMap == null) {
+ try {
+ dataMap = loadAndGetDataMap(tableSegmentUniqueIdentifier);
+ } catch (IndexBuilderException e) {
+ throw new IOException(e.getMessage(), e);
+ } catch (Throwable e) {
+ throw new IOException("Problem in loading segment block.", e);
+ }
+ }
+ return dataMap;
+ }
+
+ @Override public List<BlockletDataMap> getAll(
+ List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) throws IOException {
+ List<BlockletDataMap> blockletDataMaps = new ArrayList<>(tableSegmentUniqueIdentifiers.size());
+ try {
+ for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) {
+ blockletDataMaps.add(get(identifier));
+ }
+ } catch (Throwable e) {
+ for (BlockletDataMap dataMap : blockletDataMaps) {
+ dataMap.clear();
+ }
+ throw new IOException("Problem in loading segment blocks.", e);
+ }
+ return blockletDataMaps;
+ }
+
+ /**
+ * returns the SegmentTaskIndexWrapper
+ *
+ * @param tableSegmentUniqueIdentifier
+ * @return
+ */
+ @Override public BlockletDataMap getIfPresent(
+ TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) {
+ BlockletDataMap dataMap = (BlockletDataMap) lruCache
+ .get(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier());
+ return dataMap;
+ }
+
+ /**
+ * method invalidate the segment cache for segment
+ *
+ * @param tableSegmentUniqueIdentifier
+ */
+ @Override public void invalidate(TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) {
+ lruCache.remove(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier());
+ }
+
+ /**
+ * Below method will be used to load the segment of segments
+ * One segment may have multiple task , so table segment will be loaded
+ * based on task id and will return the map of taksId to table segment
+ * map
+ *
+ * @return map of taks id to segment mapping
+ * @throws IOException
+ */
+ private BlockletDataMap loadAndGetDataMap(
+ TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException {
+ String uniqueTableSegmentIdentifier =
+ tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
+ Object lock = segmentLockMap.get(uniqueTableSegmentIdentifier);
+ if (lock == null) {
+ lock = addAndGetSegmentLock(uniqueTableSegmentIdentifier);
+ }
+ BlockletDataMap dataMap = null;
+ synchronized (lock) {
+ dataMap = new BlockletDataMap();
+ dataMap.init(tableSegmentUniqueIdentifier.getFilePath());
+ lruCache.put(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier(), dataMap,
+ dataMap.getMemorySize());
+ }
+ return dataMap;
+ }
+
+ /**
+ * Below method will be used to get the segment level lock object
+ *
+ * @param uniqueIdentifier
+ * @return lock object
+ */
+ private synchronized Object addAndGetSegmentLock(String uniqueIdentifier) {
+ // get the segment lock object if it is present then return
+ // otherwise add the new lock and return
+ Object segmentLoderLockObject = segmentLockMap.get(uniqueIdentifier);
+ if (null == segmentLoderLockObject) {
+ segmentLoderLockObject = new Object();
+ segmentLockMap.put(uniqueIdentifier, segmentLoderLockObject);
+ }
+ return segmentLoderLockObject;
+ }
+
+ /**
+ * The method clears the access count of table segments
+ *
+ * @param tableSegmentUniqueIdentifiers
+ */
+ @Override public void clearAccessCount(
+ List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) {
+ for (TableBlockIndexUniqueIdentifier segmentUniqueIdentifier : tableSegmentUniqueIdentifiers) {
+ BlockletDataMap cacheable =
+ (BlockletDataMap) lruCache.get(segmentUniqueIdentifier.getUniqueTableSegmentIdentifier());
+ cacheable.clear();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java
new file mode 100644
index 0000000..68dedd8
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Blocklet detail information to be sent to each executor
+ */
+public class BlockletDetailInfo implements Serializable, Writable {
+
+ private int rowCount;
+
+ private short pagesCount;
+
+ private short versionNumber;
+
+ private int[] dimLens;
+
+ private long schemaUpdatedTimeStamp;
+
+ private BlockletInfo blockletInfo;
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+ public void setRowCount(int rowCount) {
+ this.rowCount = rowCount;
+ }
+
+ public int getPagesCount() {
+ return pagesCount;
+ }
+
+ public void setPagesCount(short pagesCount) {
+ this.pagesCount = pagesCount;
+ }
+
+ public short getVersionNumber() {
+ return versionNumber;
+ }
+
+ public void setVersionNumber(short versionNumber) {
+ this.versionNumber = versionNumber;
+ }
+
+ public BlockletInfo getBlockletInfo() {
+ return blockletInfo;
+ }
+
+ public void setBlockletInfo(BlockletInfo blockletInfo) {
+ this.blockletInfo = blockletInfo;
+ }
+
+ public int[] getDimLens() {
+ return dimLens;
+ }
+
+ public void setDimLens(int[] dimLens) {
+ this.dimLens = dimLens;
+ }
+
+ public long getSchemaUpdatedTimeStamp() {
+ return schemaUpdatedTimeStamp;
+ }
+
+ public void setSchemaUpdatedTimeStamp(long schemaUpdatedTimeStamp) {
+ this.schemaUpdatedTimeStamp = schemaUpdatedTimeStamp;
+ }
+
+ @Override public void write(DataOutput out) throws IOException {
+ out.writeInt(rowCount);
+ out.writeShort(pagesCount);
+ out.writeShort(versionNumber);
+ out.writeShort(dimLens.length);
+ for (int i = 0; i < dimLens.length; i++) {
+ out.writeInt(dimLens[i]);
+ }
+ out.writeLong(schemaUpdatedTimeStamp);
+ blockletInfo.write(out);
+ }
+
+ @Override public void readFields(DataInput in) throws IOException {
+ rowCount = in.readInt();
+ pagesCount = in.readShort();
+ versionNumber = in.readShort();
+ dimLens = new int[in.readShort()];
+ for (int i = 0; i < dimLens.length; i++) {
+ dimLens[i] = in.readInt();
+ }
+ schemaUpdatedTimeStamp = in.readLong();
+ blockletInfo = new BlockletInfo();
+ blockletInfo.readFields(in);
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMap.java
index 2651f15..1276494 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMap.java
@@ -21,7 +21,7 @@ import java.util.List;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
/**
- * Interface for adding and retrieving index data.
+ * Datamap is an entity which can store and retrieve index data.
*/
public interface DataMap {
@@ -47,6 +47,12 @@ public interface DataMap {
List<Blocklet> prune(FilterResolverIntf filterExp);
/**
+ * Convert datamap to distributable object
+ * @return
+ */
+ DataMapDistributable toDistributable();
+
+ /**
* Clear complete index table and release memory.
*/
void clear();
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapFactory.java
new file mode 100644
index 0000000..72f714f
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapFactory.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore;
+
+import java.util.List;
+
+import org.apache.carbondata.core.events.ChangeEvent;
+import org.apache.carbondata.core.indexstore.schema.FilterType;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+
+/**
+ * Interface for datamap factory, it is responsible for creating the datamap.
+ */
+public interface DataMapFactory {
+
+ /**
+ * Initialization of Datamap factory
+ * @param identifier
+ * @param dataMapName
+ */
+ void init(AbsoluteTableIdentifier identifier, String dataMapName);
+ /**
+ * Get the datamap writer for each segmentid.
+ *
+ * @param identifier
+ * @param segmentId
+ * @return
+ */
+ DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier,
+ String segmentId);
+
+ /**
+ * Get the datamap for segmentid
+ *
+ * @param segmentId
+ * @return
+ */
+ List<DataMap> getDataMaps(String segmentId);
+
+ /**
+ * Get datamap for distributable object.
+ *
+ * @param distributable
+ * @return
+ */
+ DataMap getDataMap(DataMapDistributable distributable);
+
+ /**
+ * This method checks whether the columns and the type of filters supported
+ * for this datamap or not
+ *
+ * @param filterType
+ * @return
+ */
+ boolean isFiltersSupported(FilterType filterType);
+
+ /**
+ *
+ * @param event
+ */
+ void fireEvent(ChangeEvent event);
+
+ /**
+ * Clears datamap of the segment
+ */
+ void clear(String segmentId);
+
+ /**
+ * Clear all datamaps from memory
+ */
+ void clear();
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java
index 06638ad..1a36187 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java
@@ -16,7 +16,9 @@
*/
package org.apache.carbondata.core.indexstore;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.carbondata.common.logging.LogService;
@@ -24,13 +26,16 @@ import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
/**
- * It maintains all the index tables in it.
+ * It maintains all the DataMaps in it.
*/
-public class DataMapStoreManager {
+public final class DataMapStoreManager {
private static DataMapStoreManager instance = new DataMapStoreManager();
- private Map<DataMapType, Map<String, TableDataMap>> dataMapMappping = new HashMap<>();
+ /**
+ * Contains the list of datamaps for each table.
+ */
+ private Map<AbsoluteTableIdentifier, List<TableDataMap>> dataMapMappping = new HashMap<>();
private static final LogService LOGGER =
LogServiceFactory.getLogService(DataMapStoreManager.class.getName());
@@ -48,56 +53,85 @@ public class DataMapStoreManager {
*/
public TableDataMap getDataMap(AbsoluteTableIdentifier identifier, String dataMapName,
DataMapType mapType) {
- Map<String, TableDataMap> map = dataMapMappping.get(mapType);
- TableDataMap dataMap = null;
- if (map == null) {
+ List<TableDataMap> tableDataMaps = dataMapMappping.get(identifier);
+ TableDataMap dataMap;
+ if (tableDataMaps == null) {
+ createTableDataMap(identifier, mapType, dataMapName);
+ tableDataMaps = dataMapMappping.get(identifier);
+ }
+ dataMap = getAbstractTableDataMap(dataMapName, tableDataMaps);
+ if (dataMap == null) {
throw new RuntimeException("Datamap does not exist");
- } else {
- dataMap = map.get(dataMapName);
- if (dataMap == null) {
- throw new RuntimeException("Datamap does not exist");
- }
}
- // Initialize datamap
- dataMap.init(identifier, dataMapName);
return dataMap;
}
/**
- * Create new datamap instance using datamap type and path
+ * Create new datamap instance using datamap name, datamap type and table identifier
*
* @param mapType
* @return
*/
- public TableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, DataMapType mapType,
- String dataMapName) {
- Map<String, TableDataMap> map = dataMapMappping.get(mapType);
- if (map == null) {
- map = new HashMap<>();
- dataMapMappping.put(mapType, map);
+ private TableDataMap createTableDataMap(AbsoluteTableIdentifier identifier,
+ DataMapType mapType, String dataMapName) {
+ List<TableDataMap> tableDataMaps = dataMapMappping.get(identifier);
+ if (tableDataMaps == null) {
+ tableDataMaps = new ArrayList<>();
+ dataMapMappping.put(identifier, tableDataMaps);
}
- TableDataMap dataMap = map.get(dataMapName);
+ TableDataMap dataMap = getAbstractTableDataMap(dataMapName, tableDataMaps);
if (dataMap != null) {
throw new RuntimeException("Already datamap exists in that path with type " + mapType);
}
try {
- //TODO create datamap using @mapType.getClassName())
+ DataMapFactory dataMapFactory = mapType.getClassObject().newInstance();
+ dataMapFactory.init(identifier, dataMapName);
+ dataMap = new TableDataMap(identifier, dataMapName, dataMapFactory);
} catch (Exception e) {
LOGGER.error(e);
+ throw new RuntimeException(e);
+ }
+ tableDataMaps.add(dataMap);
+ return dataMap;
+ }
+
+ private TableDataMap getAbstractTableDataMap(String dataMapName,
+ List<TableDataMap> tableDataMaps) {
+ TableDataMap dataMap = null;
+ for (TableDataMap tableDataMap: tableDataMaps) {
+ if (tableDataMap.getDataMapName().equals(dataMapName)) {
+ dataMap = tableDataMap;
+ break;
+ }
}
- dataMap.init(identifier, dataMapName);
- map.put(dataMapName, dataMap);
return dataMap;
}
- public void clearDataMap(String dataMapName, DataMapType mapType) {
- Map<String, TableDataMap> map = dataMapMappping.get(mapType);
- if (map != null && map.get(dataMapName) != null) {
- map.remove(dataMapName).clear();
+ /**
+ * Clear the datamap/datamaps of a mentioned datamap name and table from memory
+ * @param identifier
+ * @param dataMapName
+ */
+ public void clearDataMap(AbsoluteTableIdentifier identifier, String dataMapName) {
+ List<TableDataMap> tableDataMaps = dataMapMappping.get(identifier);
+ if (tableDataMaps != null) {
+ int i = 0;
+ for (TableDataMap tableDataMap: tableDataMaps) {
+ if (tableDataMap != null && dataMapName.equals(tableDataMap.getDataMapName())) {
+ tableDataMap.clear(new ArrayList<String>());
+ tableDataMaps.remove(i);
+ break;
+ }
+ i++;
+ }
}
}
+ /**
+ * Returns the singleton instance
+ * @return
+ */
public static DataMapStoreManager getInstance() {
return instance;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java
index b6a0f5b..0059b29 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java
@@ -16,19 +16,21 @@
*/
package org.apache.carbondata.core.indexstore;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory;
+
/**
* Datamap type
*/
public enum DataMapType {
- BLOCKLET("org.apache.carbondata.datamap.BlockletDataMap");
+ BLOCKLET(BlockletDataMapFactory.class);
- private String className;
+ private Class<? extends DataMapFactory> classObject;
- DataMapType(String className) {
- this.className = className;
+ DataMapType(Class<? extends DataMapFactory> classObject) {
+ this.classObject = classObject;
}
- public String getClassName() {
- return className;
+ public Class<? extends DataMapFactory> getClassObject() {
+ return classObject;
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java b/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java
new file mode 100644
index 0000000..7e2bc0e
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.indexstore;
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
+
+/**
+ * Class holds the absoluteTableIdentifier and segmentId to uniquely identify a segment
+ */
+public class TableBlockIndexUniqueIdentifier {
+ /**
+ * table fully qualified identifier
+ */
+ private AbsoluteTableIdentifier absoluteTableIdentifier;
+
+ private String segmentId;
+
+ private String carbonIndexFileName;
+
+ /**
+ * Constructor to initialize the class instance
+ *
+ * @param absoluteTableIdentifier
+ * @param segmentId
+ */
+ public TableBlockIndexUniqueIdentifier(AbsoluteTableIdentifier absoluteTableIdentifier,
+ String segmentId, String carbonIndexFileName) {
+ this.absoluteTableIdentifier = absoluteTableIdentifier;
+ this.segmentId = segmentId;
+ this.carbonIndexFileName = carbonIndexFileName;
+ }
+
+ /**
+ * returns AbsoluteTableIdentifier
+ *
+ * @return
+ */
+ public AbsoluteTableIdentifier getAbsoluteTableIdentifier() {
+ return absoluteTableIdentifier;
+ }
+
+ public String getSegmentId() {
+ return segmentId;
+ }
+
+ /**
+ * method returns the id to uniquely identify a key
+ *
+ * @return
+ */
+ public String getUniqueTableSegmentIdentifier() {
+ CarbonTableIdentifier carbonTableIdentifier =
+ absoluteTableIdentifier.getCarbonTableIdentifier();
+ return carbonTableIdentifier.getDatabaseName() + CarbonCommonConstants.FILE_SEPARATOR
+ + carbonTableIdentifier.getTableName() + CarbonCommonConstants.UNDERSCORE
+ + carbonTableIdentifier.getTableId() + CarbonCommonConstants.FILE_SEPARATOR + segmentId
+ + CarbonCommonConstants.FILE_SEPARATOR + carbonIndexFileName;
+ }
+
+ public String getFilePath() {
+ return absoluteTableIdentifier.getTablePath() + "/Fact/Part0/Segment_" + segmentId + "/"
+ + carbonIndexFileName;
+ }
+
+ @Override public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ TableBlockIndexUniqueIdentifier that = (TableBlockIndexUniqueIdentifier) o;
+
+ if (!absoluteTableIdentifier.equals(that.absoluteTableIdentifier)) {
+ return false;
+ }
+ if (!segmentId.equals(that.segmentId)) {
+ return false;
+ }
+ return carbonIndexFileName.equals(that.carbonIndexFileName);
+ }
+
+ @Override public int hashCode() {
+ int result = absoluteTableIdentifier.hashCode();
+ result = 31 * result + segmentId.hashCode();
+ result = 31 * result + carbonIndexFileName.hashCode();
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java
index e1532c8..39ca4c5 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java
@@ -16,38 +16,34 @@
*/
package org.apache.carbondata.core.indexstore;
+import java.util.ArrayList;
import java.util.List;
+import org.apache.carbondata.core.events.ChangeEvent;
import org.apache.carbondata.core.events.EventListener;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
-
/**
* DataMap at the table level, user can add any number of datamaps for one table. Depends
* on the filter condition it can prune the blocklets.
*/
-public interface TableDataMap extends EventListener {
+public final class TableDataMap implements EventListener {
- /**
- * It is called to initialize and load the required table datamap metadata.
- */
- void init(AbsoluteTableIdentifier identifier, String dataMapName);
+ private AbsoluteTableIdentifier identifier;
- /**
- * Gives the writer to write the metadata information of this datamap at table level.
- *
- * @return
- */
- DataMapWriter getWriter();
+ private String dataMapName;
+
+ private DataMapFactory dataMapFactory;
/**
- * Create the datamap using the segmentid and name.
- *
- * @param identifier
- * @param segmentId
- * @return
+ * It is called to initialize and load the required table datamap metadata.
*/
- DataMap createDataMap(AbsoluteTableIdentifier identifier, String segmentId);
+ public TableDataMap(AbsoluteTableIdentifier identifier, String dataMapName,
+ DataMapFactory dataMapFactory) {
+ this.identifier = identifier;
+ this.dataMapName = dataMapName;
+ this.dataMapFactory = dataMapFactory;
+ }
/**
* Pass the valid segments and prune the datamap using filter expression
@@ -56,7 +52,24 @@ public interface TableDataMap extends EventListener {
* @param filterExp
* @return
*/
- List<Blocklet> prune(List<String> segmentIds, FilterResolverIntf filterExp);
+ public List<Blocklet> prune(List<String> segmentIds, FilterResolverIntf filterExp) {
+ List<Blocklet> blocklets = new ArrayList<>();
+ for (String segmentId : segmentIds) {
+ List<DataMap> dataMaps = dataMapFactory.getDataMaps(segmentId);
+ for (DataMap dataMap : dataMaps) {
+ List<Blocklet> pruneBlocklets = dataMap.prune(filterExp);
+ blocklets.addAll(addSegmentId(pruneBlocklets, segmentId));
+ }
+ }
+ return blocklets;
+ }
+
+ private List<Blocklet> addSegmentId(List<Blocklet> pruneBlocklets, String segmentId) {
+ for (Blocklet blocklet : pruneBlocklets) {
+ blocklet.setSegmentId(segmentId);
+ }
+ return pruneBlocklets;
+ }
/**
* This is used for making the datamap distributable.
@@ -65,7 +78,16 @@ public interface TableDataMap extends EventListener {
*
* @return
*/
- List<DataMapDistributable> toDistributable(List<String> segmentIds);
+ public List<DataMapDistributable> toDistributable(List<String> segmentIds) {
+ List<DataMapDistributable> distributables = new ArrayList<>();
+ for (String segmentsId : segmentIds) {
+ List<DataMap> dataMaps = dataMapFactory.getDataMaps(segmentsId);
+ for (DataMap dataMap : dataMaps) {
+ distributables.add(dataMap.toDistributable());
+ }
+ }
+ return distributables;
+ }
/**
* This method is used from any machine after it is distributed. It takes the distributable object
@@ -75,20 +97,37 @@ public interface TableDataMap extends EventListener {
* @param filterExp
* @return
*/
- List<Blocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp);
+ public List<Blocklet> prune(DataMapDistributable distributable, FilterResolverIntf filterExp) {
+ return dataMapFactory.getDataMap(distributable).prune(filterExp);
+ }
+
+ @Override public void fireEvent(ChangeEvent event) {
+ dataMapFactory.fireEvent(event);
+ }
/**
- * This method checks whether the columns and the type of filters supported
- * for this datamap or not
- *
- * @param filterExp
- * @return
+ * Clear only the datamaps of the segments
+ * @param segmentIds
*/
- boolean isFiltersSupported(FilterResolverIntf filterExp);
+ public void clear(List<String> segmentIds) {
+ for (String segmentId: segmentIds) {
+ dataMapFactory.clear(segmentId);
+ }
+ }
/**
- * Clears table level datamap
+ * Clears all datamap
+ */
+ public void clear() {
+ dataMapFactory.clear();
+ }
+ /**
+ * Get the unique name of datamap
+ *
+ * @return
*/
- void clear();
+ public String getDataMapName() {
+ return dataMapName;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
new file mode 100644
index 0000000..8246f99
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore;
+
+import org.apache.carbondata.core.indexstore.row.DataMapRow;
+import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow;
+import org.apache.carbondata.core.indexstore.schema.DataMapSchema;
+import org.apache.carbondata.core.memory.MemoryAllocator;
+import org.apache.carbondata.core.memory.MemoryAllocatorFactory;
+import org.apache.carbondata.core.memory.MemoryBlock;
+
+import static org.apache.carbondata.core.memory.CarbonUnsafe.BYTE_ARRAY_OFFSET;
+import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe;
+
+/**
+ * Store the data map row @{@link DataMapRow} data to unsafe.
+ */
+public class UnsafeMemoryDMStore {
+
+ private MemoryBlock memoryBlock;
+
+ private static int capacity = 8 * 1024 * 1024;
+
+ private int allocatedSize;
+
+ private int runningLength;
+
+ private MemoryAllocator memoryAllocator;
+
+ private boolean isMemoryFreed;
+
+ private DataMapSchema[] schema;
+
+ private int[] pointers;
+
+ private int rowCount;
+
+ public UnsafeMemoryDMStore(DataMapSchema[] schema) {
+ this.schema = schema;
+ this.memoryAllocator = MemoryAllocatorFactory.INSATANCE.getMemoryAllocator();
+ this.allocatedSize = capacity;
+ this.memoryBlock = memoryAllocator.allocate(allocatedSize);
+ this.pointers = new int[1000];
+ }
+
+ /**
+ * Check memory is sufficient or not, if not sufficient allocate more memory and copy old data to
+ * new one.
+ *
+ * @param rowSize
+ */
+ private void ensureSize(int rowSize) {
+ if (runningLength + rowSize >= allocatedSize) {
+ MemoryBlock allocate =
+ MemoryAllocatorFactory.INSATANCE.getMemoryAllocator().allocate(allocatedSize + capacity);
+ unsafe.copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(),
+ allocate.getBaseObject(), allocate.getBaseOffset(), runningLength);
+ memoryAllocator.free(memoryBlock);
+ allocatedSize = allocatedSize + capacity;
+ memoryBlock = allocate;
+ }
+ if (this.pointers.length <= rowCount + 1) {
+ int[] newPointer = new int[pointers.length + 1000];
+ System.arraycopy(pointers, 0, newPointer, 0, pointers.length);
+ this.pointers = newPointer;
+ }
+ }
+
+ /**
+ * Add the index row to unsafe.
+ *
+ * @param indexRow
+ * @return
+ */
+ public void addIndexRowToUnsafe(DataMapRow indexRow) {
+ // First calculate the required memory to keep the row in unsafe
+ int rowSize = indexRow.getTotalSizeInBytes();
+ // Check whether allocated memory is sufficient or not.
+ ensureSize(rowSize);
+ int pointer = runningLength;
+
+ for (int i = 0; i < schema.length; i++) {
+ addToUnsafe(schema[i], indexRow, i);
+ }
+ pointers[rowCount++] = pointer;
+ }
+
+ private void addToUnsafe(DataMapSchema schema, DataMapRow row, int index) {
+ switch (schema.getSchemaType()) {
+ case FIXED:
+ switch (schema.getDataType()) {
+ case BYTE:
+ unsafe.putByte(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getByte(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case SHORT:
+ unsafe
+ .putShort(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getShort(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case INT:
+ unsafe.putInt(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getInt(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case LONG:
+ unsafe.putLong(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getLong(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case FLOAT:
+ unsafe
+ .putFloat(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getFloat(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case DOUBLE:
+ unsafe
+ .putDouble(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength,
+ row.getDouble(index));
+ runningLength += row.getSizeInBytes(index);
+ break;
+ case BYTE_ARRAY:
+ byte[] data = row.getByteArray(index);
+ unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, memoryBlock.getBaseObject(),
+ memoryBlock.getBaseOffset() + runningLength, data.length);
+ runningLength += row.getSizeInBytes(index);
+ break;
+ }
+ break;
+ case VARIABLE:
+ byte[] data = row.getByteArray(index);
+ unsafe.putShort(memoryBlock.getBaseOffset() + runningLength, (short) data.length);
+ runningLength += 2;
+ unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, memoryBlock.getBaseObject(),
+ memoryBlock.getBaseOffset() + runningLength, data.length);
+ runningLength += data.length;
+ break;
+ case STRUCT:
+ DataMapSchema[] childSchemas =
+ ((DataMapSchema.StructDataMapSchema) schema).getChildSchemas();
+ DataMapRow struct = row.getRow(index);
+ for (int i = 0; i < childSchemas.length; i++) {
+ addToUnsafe(childSchemas[i], struct, i);
+ }
+ break;
+ }
+ }
+
+ public DataMapRow getUnsafeRow(int index) {
+ assert (index < rowCount);
+ return new UnsafeDataMapRow(schema, memoryBlock, pointers[index]);
+ }
+
+ public void finishWriting() {
+ if (runningLength < allocatedSize) {
+ MemoryBlock allocate =
+ MemoryAllocatorFactory.INSATANCE.getMemoryAllocator().allocate(runningLength);
+ unsafe.copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(),
+ allocate.getBaseObject(), allocate.getBaseOffset(), runningLength);
+ memoryAllocator.free(memoryBlock);
+ memoryBlock = allocate;
+ }
+ // Compact pointers.
+ if (rowCount < pointers.length) {
+ int[] newPointer = new int[rowCount];
+ System.arraycopy(pointers, 0, newPointer, 0, rowCount);
+ this.pointers = newPointer;
+ }
+ }
+
+ public void freeMemory() {
+ if (!isMemoryFreed) {
+ memoryAllocator.free(memoryBlock);
+ isMemoryFreed = true;
+ }
+ }
+
+ public int getMemoryUsed() {
+ return runningLength;
+ }
+
+ public DataMapSchema[] getSchema() {
+ return schema;
+ }
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java
new file mode 100644
index 0000000..9a50600
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+
+import org.apache.carbondata.core.indexstore.row.DataMapRow;
+import org.apache.carbondata.core.util.ByteUtil;
+
+/**
+ * Data map comparator
+ */
+public class BlockletDMComparator implements Comparator<DataMapRow> {
+
+ /**
+ * no dictionary column value is of variable length so in each column value
+ * it will -1
+ */
+ private static final int NO_DCITIONARY_COLUMN_VALUE = -1;
+
+ /**
+ * sized of the short value in bytes
+ */
+ private static final short SHORT_SIZE_IN_BYTES = 2;
+
+ private int[] eachColumnValueSize;
+
+ /**
+ * the number of no dictionary columns in SORT_COLUMNS
+ */
+ private int numberOfNoDictSortColumns;
+
+ /**
+ * the number of columns in SORT_COLUMNS
+ */
+ private int numberOfSortColumns;
+
+ public BlockletDMComparator(int[] eachColumnValueSize, int numberOfSortColumns,
+ int numberOfNoDictSortColumns) {
+ this.eachColumnValueSize = eachColumnValueSize;
+ this.numberOfNoDictSortColumns = numberOfNoDictSortColumns;
+ this.numberOfSortColumns = numberOfSortColumns;
+ }
+
+ @Override public int compare(DataMapRow first, DataMapRow second) {
+ int dictionaryKeyOffset = 0;
+ int nonDictionaryKeyOffset = 0;
+ int compareResult = 0;
+ int processedNoDictionaryColumn = numberOfNoDictSortColumns;
+ byte[][] firstBytes = splitKey(first.getByteArray(0));
+ byte[][] secondBytes = splitKey(first.getByteArray(0));
+ byte[] firstNoDictionaryKeys = firstBytes[1];
+ ByteBuffer firstNoDictionaryKeyBuffer = ByteBuffer.wrap(firstNoDictionaryKeys);
+ byte[] secondNoDictionaryKeys = secondBytes[1];
+ ByteBuffer secondNoDictionaryKeyBuffer = ByteBuffer.wrap(secondNoDictionaryKeys);
+ int actualOffset = 0;
+ int actualOffset1 = 0;
+ int firstNoDcitionaryLength = 0;
+ int secondNodeDictionaryLength = 0;
+
+ for (int i = 0; i < numberOfSortColumns; i++) {
+
+ if (eachColumnValueSize[i] != NO_DCITIONARY_COLUMN_VALUE) {
+ byte[] firstDictionaryKeys = firstBytes[0];
+ byte[] secondDictionaryKeys = secondBytes[0];
+ compareResult = ByteUtil.UnsafeComparer.INSTANCE
+ .compareTo(firstDictionaryKeys, dictionaryKeyOffset, eachColumnValueSize[i],
+ secondDictionaryKeys, dictionaryKeyOffset, eachColumnValueSize[i]);
+ dictionaryKeyOffset += eachColumnValueSize[i];
+ } else {
+ if (processedNoDictionaryColumn > 1) {
+ actualOffset = firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset);
+ firstNoDcitionaryLength =
+ firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset + SHORT_SIZE_IN_BYTES)
+ - actualOffset;
+ actualOffset1 = secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset);
+ secondNodeDictionaryLength =
+ secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset + SHORT_SIZE_IN_BYTES)
+ - actualOffset1;
+ compareResult = ByteUtil.UnsafeComparer.INSTANCE
+ .compareTo(firstNoDictionaryKeys, actualOffset, firstNoDcitionaryLength,
+ secondNoDictionaryKeys, actualOffset1, secondNodeDictionaryLength);
+ nonDictionaryKeyOffset += SHORT_SIZE_IN_BYTES;
+ processedNoDictionaryColumn--;
+ } else {
+ actualOffset = firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset);
+ actualOffset1 = secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset);
+ firstNoDcitionaryLength = firstNoDictionaryKeys.length - actualOffset;
+ secondNodeDictionaryLength = secondNoDictionaryKeys.length - actualOffset1;
+ compareResult = ByteUtil.UnsafeComparer.INSTANCE
+ .compareTo(firstNoDictionaryKeys, actualOffset, firstNoDcitionaryLength,
+ secondNoDictionaryKeys, actualOffset1, secondNodeDictionaryLength);
+ }
+ }
+ if (compareResult != 0) {
+ return compareResult;
+ }
+ }
+
+ return 0;
+ }
+
+ /**
+ * Split the index key to dictionary and no dictionary.
+ * @param startKey
+ * @return
+ */
+ private byte[][] splitKey(byte[] startKey) {
+ ByteBuffer buffer = ByteBuffer.wrap(startKey);
+ buffer.rewind();
+ int dictonaryKeySize = buffer.getInt();
+ int nonDictonaryKeySize = buffer.getInt();
+ byte[] dictionaryKey = new byte[dictonaryKeySize];
+ buffer.get(dictionaryKey);
+ byte[] nonDictionaryKey = new byte[nonDictonaryKeySize];
+ buffer.get(nonDictionaryKey);
+ return new byte[][] {dictionaryKey, nonDictionaryKey};
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
new file mode 100644
index 0000000..79aa091
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.cache.Cacheable;
+import org.apache.carbondata.core.datastore.IndexKey;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.indexstore.Blocklet;
+import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
+import org.apache.carbondata.core.indexstore.DataMap;
+import org.apache.carbondata.core.indexstore.DataMapDistributable;
+import org.apache.carbondata.core.indexstore.DataMapWriter;
+import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore;
+import org.apache.carbondata.core.indexstore.row.DataMapRow;
+import org.apache.carbondata.core.indexstore.row.DataMapRowImpl;
+import org.apache.carbondata.core.indexstore.schema.DataMapSchema;
+import org.apache.carbondata.core.keygenerator.KeyGenException;
+import org.apache.carbondata.core.metadata.blocklet.BlockletInfo;
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex;
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.scan.filter.FilterUtil;
+import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
+import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataFileFooterConverter;
+
+/**
+ * Datamap implementation for blocklet.
+ */
+public class BlockletDataMap implements DataMap, Cacheable {
+
+ private static final LogService LOGGER =
+ LogServiceFactory.getLogService(BlockletDataMap.class.getName());
+
+ private static int KEY_INDEX = 0;
+
+ private static int MIN_VALUES_INDEX = 1;
+
+ private static int MAX_VALUES_INDEX = 2;
+
+ private static int ROW_COUNT_INDEX = 3;
+
+ private static int FILE_PATH_INDEX = 4;
+
+ private static int PAGE_COUNT_INDEX = 5;
+
+ private static int VERSION_INDEX = 6;
+
+ private static int SCHEMA_UPADATED_TIME_INDEX = 7;
+
+ private static int BLOCK_INFO_INDEX = 8;
+
+ private UnsafeMemoryDMStore unsafeMemoryDMStore;
+
+ private SegmentProperties segmentProperties;
+
+ private int[] columnCardinality;
+
+ @Override public DataMapWriter getWriter() {
+ return null;
+ }
+
+ @Override public void init(String path) {
+ DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
+ try {
+ List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(path);
+ for (DataFileFooter fileFooter : indexInfo) {
+ List<ColumnSchema> columnInTable = fileFooter.getColumnInTable();
+ if (segmentProperties == null) {
+ columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality();
+ segmentProperties = new SegmentProperties(columnInTable, columnCardinality);
+ createSchema(segmentProperties);
+ }
+ TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo();
+ fileFooter = CarbonUtil.readMetadatFile(blockInfo);
+
+ loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath());
+ }
+ if (unsafeMemoryDMStore != null) {
+ unsafeMemoryDMStore.finishWriting();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties,
+ String filePath) {
+ int[] minMaxLen = segmentProperties.getEachDimColumnValueSize();
+ List<BlockletInfo> blockletList = fileFooter.getBlockletList();
+ DataMapSchema[] schema = unsafeMemoryDMStore.getSchema();
+ for (int index = 0; index < blockletList.size(); index++) {
+ DataMapRow row = new DataMapRowImpl(schema);
+ int ordinal = 0;
+ BlockletInfo blockletInfo = blockletList.get(index);
+
+ // add start key as index key
+ row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++);
+
+ BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex();
+ row.setRow(addMinMax(minMaxLen, schema[ordinal], minMaxIndex.getMinValues()), ordinal);
+ ordinal++;
+ row.setRow(addMinMax(minMaxLen, schema[ordinal], minMaxIndex.getMaxValues()), ordinal);
+ ordinal++;
+
+ row.setInt(blockletInfo.getNumberOfRows(), ordinal++);
+
+ // add file path
+ byte[] filePathBytes = filePath.getBytes();
+ row.setByteArray(filePathBytes, ordinal++);
+
+ // add pages
+ row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++);
+
+ // add version number
+ row.setShort(fileFooter.getVersionId().number(), ordinal++);
+
+ // add schema updated time
+ row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++);
+
+ // add blocklet info
+ byte[] serializedData;
+ try {
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ DataOutput dataOutput = new DataOutputStream(stream);
+ blockletInfo.write(dataOutput);
+ serializedData = stream.toByteArray();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ row.setByteArray(serializedData, ordinal);
+ unsafeMemoryDMStore.addIndexRowToUnsafe(row);
+ }
+ }
+
+ private DataMapRow addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] minValues) {
+ DataMapSchema[] minSchemas =
+ ((DataMapSchema.StructDataMapSchema) dataMapSchema).getChildSchemas();
+ DataMapRow minRow = new DataMapRowImpl(minSchemas);
+ int minOrdinal = 0;
+ // min value adding
+ for (int i = 0; i < minMaxLen.length; i++) {
+ minRow.setByteArray(minValues[i], minOrdinal++);
+ }
+ return minRow;
+ }
+
+ private void createSchema(SegmentProperties segmentProperties) {
+ List<DataMapSchema> indexSchemas = new ArrayList<>();
+
+ // Index key
+ indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY));
+ int[] minMaxLen = segmentProperties.getEachDimColumnValueSize();
+ // do it 2 times, one for min and one for max.
+ for (int k = 0; k < 2; k++) {
+ DataMapSchema[] mapSchemas = new DataMapSchema[minMaxLen.length];
+ for (int i = 0; i < minMaxLen.length; i++) {
+ if (minMaxLen[i] <= 0) {
+ mapSchemas[i] = new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY);
+ } else {
+ mapSchemas[i] = new DataMapSchema.FixedDataMapSchema(DataType.BYTE_ARRAY, minMaxLen[i]);
+ }
+ }
+ DataMapSchema mapSchema = new DataMapSchema.StructDataMapSchema(DataType.STRUCT, mapSchemas);
+ indexSchemas.add(mapSchema);
+ }
+
+ // for number of rows.
+ indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.INT));
+
+ // for table block path
+ indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY));
+
+ // for number of pages.
+ indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.SHORT));
+
+ // for version number.
+ indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.SHORT));
+
+ // for schema updated time.
+ indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.LONG));
+
+ //for blocklet info
+ indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY));
+
+ unsafeMemoryDMStore =
+ new UnsafeMemoryDMStore(indexSchemas.toArray(new DataMapSchema[indexSchemas.size()]));
+ }
+
+ @Override public List<Blocklet> prune(FilterResolverIntf filterExp) {
+
+ // getting the start and end index key based on filter for hitting the
+ // selected block reference nodes based on filter resolver tree.
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("preparing the start and end key for finding"
+ + "start and end block as per filter resolver");
+ }
+ List<Blocklet> blocklets = new ArrayList<>();
+ Comparator<DataMapRow> comparator =
+ new BlockletDMComparator(segmentProperties.getEachDimColumnValueSize(),
+ segmentProperties.getNumberOfSortColumns(),
+ segmentProperties.getNumberOfNoDictSortColumns());
+ List<IndexKey> listOfStartEndKeys = new ArrayList<IndexKey>(2);
+ FilterUtil
+ .traverseResolverTreeAndGetStartAndEndKey(segmentProperties, filterExp, listOfStartEndKeys);
+ // reading the first value from list which has start key
+ IndexKey searchStartKey = listOfStartEndKeys.get(0);
+ // reading the last value from list which has end key
+ IndexKey searchEndKey = listOfStartEndKeys.get(1);
+ if (null == searchStartKey && null == searchEndKey) {
+ try {
+ // TODO need to handle for no dictionary dimensions
+ searchStartKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties);
+ // TODO need to handle for no dictionary dimensions
+ searchEndKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties);
+ } catch (KeyGenException e) {
+ return null;
+ }
+ }
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug(
+ "Successfully retrieved the start and end key" + "Dictionary Start Key: " + searchStartKey
+ .getDictionaryKeys() + "No Dictionary Start Key " + searchStartKey
+ .getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys()
+ + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys());
+ }
+ if (filterExp == null) {
+ int rowCount = unsafeMemoryDMStore.getRowCount();
+ for (int i = 0; i < rowCount; i++) {
+ DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(i);
+ blocklets.add(createBlocklet(unsafeRow, i));
+ }
+ } else {
+ int startIndex = findStartIndex(convertToRow(searchStartKey), comparator);
+ int endIndex = findEndIndex(convertToRow(searchEndKey), comparator);
+ FilterExecuter filterExecuter =
+ FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null);
+ while (startIndex <= endIndex) {
+ DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(startIndex);
+ BitSet bitSet = filterExecuter.isScanRequired(getMinMaxValue(unsafeRow, MAX_VALUES_INDEX),
+ getMinMaxValue(unsafeRow, MIN_VALUES_INDEX));
+ if (!bitSet.isEmpty()) {
+ blocklets.add(createBlocklet(unsafeRow, startIndex));
+ }
+ startIndex++;
+ }
+ }
+
+ return blocklets;
+ }
+
+ private byte[][] getMinMaxValue(DataMapRow row, int index) {
+ DataMapRow minMaxRow = row.getRow(index);
+ byte[][] minMax = new byte[minMaxRow.getColumnCount()][];
+ for (int i = 0; i < minMax.length; i++) {
+ minMax[i] = minMaxRow.getByteArray(i);
+ }
+ return minMax;
+ }
+
+ private Blocklet createBlocklet(DataMapRow row, int blockletId) {
+ Blocklet blocklet =
+ new Blocklet(new String(row.getByteArray(FILE_PATH_INDEX)), blockletId + "");
+ BlockletDetailInfo detailInfo = new BlockletDetailInfo();
+ detailInfo.setRowCount(row.getInt(ROW_COUNT_INDEX));
+ detailInfo.setPagesCount(row.getShort(PAGE_COUNT_INDEX));
+ detailInfo.setVersionNumber(row.getShort(VERSION_INDEX));
+ detailInfo.setDimLens(columnCardinality);
+ detailInfo.setSchemaUpdatedTimeStamp(row.getLong(SCHEMA_UPADATED_TIME_INDEX));
+ BlockletInfo blockletInfo = new BlockletInfo();
+ try {
+ byte[] byteArray = row.getByteArray(BLOCK_INFO_INDEX);
+ ByteArrayInputStream stream = new ByteArrayInputStream(byteArray);
+ DataInputStream inputStream = new DataInputStream(stream);
+ blockletInfo.readFields(inputStream);
+ inputStream.close();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ detailInfo.setBlockletInfo(blockletInfo);
+ blocklet.setDetailInfo(detailInfo);
+ return blocklet;
+ }
+
+ /**
+ * Binary search used to get the first tentative index row based on
+ * search key
+ *
+ * @param key search key
+ * @return first tentative block
+ */
+ private int findStartIndex(DataMapRow key, Comparator<DataMapRow> comparator) {
+ int childNodeIndex;
+ int low = 0;
+ int high = unsafeMemoryDMStore.getRowCount() - 1;
+ int mid = 0;
+ int compareRes = -1;
+ //
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ // compare the entries
+ compareRes = comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(mid));
+ if (compareRes < 0) {
+ high = mid - 1;
+ } else if (compareRes > 0) {
+ low = mid + 1;
+ } else {
+ // if key is matched then get the first entry
+ int currentPos = mid;
+ while (currentPos - 1 >= 0
+ && comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(currentPos - 1)) == 0) {
+ currentPos--;
+ }
+ mid = currentPos;
+ break;
+ }
+ }
+ // if compare result is less than zero then we
+ // and mid is more than 0 then we need to previous block as duplicates
+ // record can be present
+ if (compareRes < 0) {
+ if (mid > 0) {
+ mid--;
+ }
+ childNodeIndex = mid;
+ } else {
+ childNodeIndex = mid;
+ }
+ // get the leaf child
+ return childNodeIndex;
+ }
+
+ /**
+ * Binary search used to get the last tentative block based on
+ * search key
+ *
+ * @param key search key
+ * @return first tentative block
+ */
+ private int findEndIndex(DataMapRow key, Comparator<DataMapRow> comparator) {
+ int childNodeIndex;
+ int low = 0;
+ int high = unsafeMemoryDMStore.getRowCount() - 1;
+ int mid = 0;
+ int compareRes = -1;
+ //
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ // compare the entries
+ compareRes = comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(mid));
+ if (compareRes < 0) {
+ high = mid - 1;
+ } else if (compareRes > 0) {
+ low = mid + 1;
+ } else {
+ int currentPos = mid;
+ // if key is matched then get the first entry
+ while (currentPos + 1 < unsafeMemoryDMStore.getRowCount()
+ && comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(currentPos + 1)) == 0) {
+ currentPos++;
+ }
+ mid = currentPos;
+ break;
+ }
+ }
+ // if compare result is less than zero then we
+ // and mid is more than 0 then we need to previous block as duplicates
+ // record can be present
+ if (compareRes < 0) {
+ if (mid > 0) {
+ mid--;
+ }
+ childNodeIndex = mid;
+ } else {
+ childNodeIndex = mid;
+ }
+ return childNodeIndex;
+ }
+
+ private DataMapRow convertToRow(IndexKey key) {
+ ByteBuffer buffer =
+ ByteBuffer.allocate(key.getDictionaryKeys().length + key.getNoDictionaryKeys().length + 8);
+ buffer.putInt(key.getDictionaryKeys().length);
+ buffer.putInt(key.getNoDictionaryKeys().length);
+ buffer.put(key.getDictionaryKeys());
+ buffer.put(key.getNoDictionaryKeys());
+ DataMapRowImpl dataMapRow = new DataMapRowImpl(unsafeMemoryDMStore.getSchema());
+ dataMapRow.setByteArray(buffer.array(), 0);
+ return dataMapRow;
+ }
+
+ @Override public void clear() {
+ unsafeMemoryDMStore.freeMemory();
+ unsafeMemoryDMStore = null;
+ segmentProperties = null;
+ }
+
+ @Override public long getFileTimeStamp() {
+ return 0;
+ }
+
+ @Override public int getAccessCount() {
+ return 0;
+ }
+
+ @Override public long getMemorySize() {
+ return unsafeMemoryDMStore.getMemoryUsed();
+ }
+
+ @Override public DataMapDistributable toDistributable() {
+ // TODO
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
new file mode 100644
index 0000000..2fe6643
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.carbondata.core.cache.Cache;
+import org.apache.carbondata.core.cache.CacheProvider;
+import org.apache.carbondata.core.cache.CacheType;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
+import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.events.ChangeEvent;
+import org.apache.carbondata.core.indexstore.DataMap;
+import org.apache.carbondata.core.indexstore.DataMapDistributable;
+import org.apache.carbondata.core.indexstore.DataMapFactory;
+import org.apache.carbondata.core.indexstore.DataMapWriter;
+import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier;
+import org.apache.carbondata.core.indexstore.schema.FilterType;
+import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+
+/**
+ * Table map for blocklet
+ */
+public class BlockletDataMapFactory implements DataMapFactory {
+
+ private AbsoluteTableIdentifier identifier;
+
+ private Map<String, List<TableBlockIndexUniqueIdentifier>> segmentMap = new HashMap<>();
+
+ private Cache<TableBlockIndexUniqueIdentifier, DataMap> cache;
+
+ public void init(AbsoluteTableIdentifier identifier, String dataMapName) {
+ this.identifier = identifier;
+ cache = CacheProvider.getInstance()
+ .createCache(CacheType.DRIVER_BLOCKLET_DATAMAP, identifier.getStorePath());
+ }
+
+ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String segmentId) {
+ return null;
+ }
+
+ public List<DataMap> getDataMaps(String segmentId) {
+ List<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers =
+ segmentMap.get(segmentId);
+ if (tableBlockIndexUniqueIdentifiers == null) {
+ tableBlockIndexUniqueIdentifiers = new ArrayList<>();
+ String path = identifier.getTablePath() + "/Fact/Part0/Segment_" + segmentId;
+ FileFactory.FileType fileType = FileFactory.getFileType(path);
+ CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType);
+ CarbonFile[] listFiles = carbonFile.listFiles(new CarbonFileFilter() {
+ @Override public boolean accept(CarbonFile file) {
+ return file.getName().endsWith(".carbonindex");
+ }
+ });
+ for (int i = 0; i < listFiles.length; i++) {
+ tableBlockIndexUniqueIdentifiers.add(
+ new TableBlockIndexUniqueIdentifier(identifier, segmentId, listFiles[i].getName()));
+ }
+ }
+
+ try {
+ return cache.getAll(tableBlockIndexUniqueIdentifiers);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override public boolean isFiltersSupported(FilterType filterType) {
+ return true;
+ }
+
+ public void clear(String segmentId) {
+ List<TableBlockIndexUniqueIdentifier> blockIndexes = segmentMap.remove(segmentId);
+ if (blockIndexes != null) {
+ for (TableBlockIndexUniqueIdentifier blockIndex : blockIndexes) {
+ DataMap dataMap = cache.getIfPresent(blockIndex);
+ dataMap.clear();
+ cache.invalidate(blockIndex);
+ }
+ }
+ }
+
+ @Override public void clear() {
+ for (String segmentId: segmentMap.keySet()) {
+ clear(segmentId);
+ }
+ }
+
+ @Override public DataMap getDataMap(DataMapDistributable distributable) {
+ return null;
+ }
+
+ @Override public void fireEvent(ChangeEvent event) {
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
new file mode 100644
index 0000000..5509c75
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.carbondata.core.cache.update.BlockletLevelDeleteDeltaDataCache;
+import org.apache.carbondata.core.datastore.DataRefNode;
+import org.apache.carbondata.core.datastore.FileHolder;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
+import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
+import org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory;
+import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader;
+import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader;
+import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+
+/**
+ * wrapper for blocklet data map data
+ */
+public class BlockletDataRefNodeWrapper implements DataRefNode {
+
+ private List<TableBlockInfo> blockInfos;
+
+ private int index;
+
+ private int[] dimensionLens;
+
+ private BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache;
+
+ public BlockletDataRefNodeWrapper(List<TableBlockInfo> blockInfos, int index,
+ int[] dimensionLens) {
+ this.blockInfos = blockInfos;
+ this.index = index;
+ this.dimensionLens = dimensionLens;
+ }
+
+ @Override public DataRefNode getNextDataRefNode() {
+ if (index + 1 < blockInfos.size()) {
+ new BlockletDataRefNodeWrapper(blockInfos, index + 1, dimensionLens);
+ }
+ return null;
+ }
+
+ @Override public int nodeSize() {
+ return blockInfos.get(index).getDetailInfo().getRowCount();
+ }
+
+ @Override public long nodeNumber() {
+ return index;
+ }
+
+ @Override public byte[][] getColumnsMaxValue() {
+ return null;
+ }
+
+ @Override public byte[][] getColumnsMinValue() {
+ return null;
+ }
+
+ @Override
+ public DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] blockIndexes)
+ throws IOException {
+ DimensionColumnChunkReader dimensionChunksReader = getDimensionColumnChunkReader();
+ return dimensionChunksReader.readRawDimensionChunks(fileReader, blockIndexes);
+ }
+
+ @Override
+ public DimensionRawColumnChunk getDimensionChunk(FileHolder fileReader, int blockIndexes)
+ throws IOException {
+ DimensionColumnChunkReader dimensionChunksReader = getDimensionColumnChunkReader();
+ return dimensionChunksReader.readRawDimensionChunk(fileReader, blockIndexes);
+ }
+
+ @Override
+ public MeasureRawColumnChunk[] getMeasureChunks(FileHolder fileReader, int[][] blockIndexes)
+ throws IOException {
+ MeasureColumnChunkReader measureColumnChunkReader = getMeasureColumnChunkReader();
+ return measureColumnChunkReader.readRawMeasureChunks(fileReader, blockIndexes);
+ }
+
+ @Override public MeasureRawColumnChunk getMeasureChunk(FileHolder fileReader, int blockIndex)
+ throws IOException {
+ MeasureColumnChunkReader measureColumnChunkReader = getMeasureColumnChunkReader();
+ return measureColumnChunkReader.readRawMeasureChunk(fileReader, blockIndex);
+ }
+
+ private DimensionColumnChunkReader getDimensionColumnChunkReader() throws IOException {
+ ColumnarFormatVersion version =
+ ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber());
+ DimensionColumnChunkReader dimensionColumnChunkReader = CarbonDataReaderFactory.getInstance()
+ .getDimensionColumnChunkReader(version,
+ blockInfos.get(index).getDetailInfo().getBlockletInfo(), dimensionLens,
+ blockInfos.get(index).getFilePath());
+ return dimensionColumnChunkReader;
+ }
+
+ private MeasureColumnChunkReader getMeasureColumnChunkReader() throws IOException {
+ ColumnarFormatVersion version =
+ ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber());
+ return CarbonDataReaderFactory.getInstance().getMeasureColumnChunkReader(version,
+ blockInfos.get(index).getDetailInfo().getBlockletInfo(),
+ blockInfos.get(index).getFilePath());
+ }
+
+ @Override
+ public void setDeleteDeltaDataCache(BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache) {
+ this.deleteDeltaDataCache = deleteDeltaDataCache;
+ }
+
+ @Override public BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache() {
+ return deleteDeltaDataCache;
+ }
+
+ @Override public int numberOfPages() {
+ return blockInfos.get(index).getDetailInfo().getPagesCount();
+ }
+
+ public int numberOfNodes() {
+ return blockInfos.size();
+ }
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/b385d14b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
new file mode 100644
index 0000000..b8cffc6
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.carbondata.core.indexstore.blockletindex;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.carbondata.core.datastore.block.AbstractIndex;
+import org.apache.carbondata.core.datastore.block.SegmentProperties;
+import org.apache.carbondata.core.datastore.block.TableBlockInfo;
+import org.apache.carbondata.core.metadata.blocklet.DataFileFooter;
+import org.apache.carbondata.core.util.CarbonUtil;
+
+/**
+ * Wrapper of abstract index
+ * TODO it could be removed after refactor
+ */
+public class IndexWrapper extends AbstractIndex {
+
+ public IndexWrapper(List<TableBlockInfo> blockInfos) {
+ DataFileFooter fileFooter = null;
+ try {
+ fileFooter = CarbonUtil.readMetadatFile(blockInfos.get(0));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ segmentProperties = new SegmentProperties(fileFooter.getColumnInTable(),
+ fileFooter.getSegmentInfo().getColumnCardinality());
+ dataRefNode = new BlockletDataRefNodeWrapper(blockInfos, 0,
+ segmentProperties.getDimensionColumnsValueSize());
+ }
+
+ @Override public void buildIndex(List<DataFileFooter> footerList) {
+ }
+}
[12/50] [abbrv] carbondata git commit: [CARBONDATA-1215][BUGFIX] Fix
unsafe column page for decimal query This closes #1121
Posted by ja...@apache.org.
[CARBONDATA-1215][BUGFIX] Fix unsafe column page for decimal query This closes #1121
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3e726e0a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3e726e0a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3e726e0a
Branch: refs/heads/datamap
Commit: 3e726e0aca440234f02b5f62a05390abe10f5a1d
Parents: ad80006 fdb672a
Author: QiangCai <qi...@qq.com>
Authored: Sat Jul 1 13:10:31 2017 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Sat Jul 1 13:10:31 2017 +0800
----------------------------------------------------------------------
.../page/UnsafeVarLengthColumnPage.java | 35 ++++++++++++++++----
.../datastore/page/VarLengthColumnPageBase.java | 3 +-
.../resources/big_decimal_without_header.csv | 5 +++
.../TestLoadDataWithHiveSyntaxUnsafe.scala | 25 +++++++++++++-
4 files changed, 59 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
[47/50] [abbrv] carbondata git commit: Update
supported-data-types-in-carbondata.md
Posted by ja...@apache.org.
Update supported-data-types-in-carbondata.md
This closes #1165
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9e4da2a6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9e4da2a6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9e4da2a6
Branch: refs/heads/datamap
Commit: 9e4da2a6caeef6acd637e49a29c70d2eedd4a504
Parents: 1a35cfb
Author: chenerlu <ch...@huawei.com>
Authored: Wed Jul 12 18:50:02 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Jul 12 22:04:50 2017 +0800
----------------------------------------------------------------------
docs/supported-data-types-in-carbondata.md | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/9e4da2a6/docs/supported-data-types-in-carbondata.md
----------------------------------------------------------------------
diff --git a/docs/supported-data-types-in-carbondata.md b/docs/supported-data-types-in-carbondata.md
index 8f271e3..561248c 100644
--- a/docs/supported-data-types-in-carbondata.md
+++ b/docs/supported-data-types-in-carbondata.md
@@ -35,6 +35,7 @@
* String Types
* STRING
* CHAR
+ * VARCHAR
* Complex Types
* arrays: ARRAY``<data_type>``
[26/50] [abbrv] carbondata git commit: [CARBONDATA-1241] Single sort
to be blocked for global sort. This closes #1109
Posted by ja...@apache.org.
[CARBONDATA-1241] Single sort to be blocked for global sort. This closes #1109
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1bd7b3da
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1bd7b3da
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1bd7b3da
Branch: refs/heads/datamap
Commit: 1bd7b3dafef440f13a187cd7f43a06eb05efff5d
Parents: b699ee6 a4083bf
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Wed Jul 5 09:12:59 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 09:12:59 2017 +0530
----------------------------------------------------------------------
.../execution/command/carbonTableSchema.scala | 1 +
.../execution/command/carbonTableSchema.scala | 221 +++++++++++--------
.../store/CarbonFactDataHandlerColumnar.java | 8 +
3 files changed, 137 insertions(+), 93 deletions(-)
----------------------------------------------------------------------
[41/50] [abbrv] carbondata git commit: [CARBONDATA-1282] Choose
BatchedDatasource scan only if schema fits codegen
Posted by ja...@apache.org.
[CARBONDATA-1282] Choose BatchedDatasource scan only if schema fits codegen
This closes #1148
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/619f1f95
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/619f1f95
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/619f1f95
Branch: refs/heads/datamap
Commit: 619f1f95429c71ec556b18104d3aff536ad2df89
Parents: c6bc1f0
Author: ashok.blend <as...@gmail.com>
Authored: Sat Jul 8 16:27:41 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Mon Jul 10 19:28:14 2017 +0530
----------------------------------------------------------------------
.../apache/spark/sql/execution/CarbonLateDecodeStrategy.scala | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/619f1f95/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala
index 4605914..a206bef 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala
@@ -524,7 +524,9 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy {
CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT)
}
}
- sqlContext.conf.wholeStageEnabled && vectorizedReader.toBoolean &&
+ val supportCodegen =
+ sqlContext.conf.wholeStageEnabled && sqlContext.conf.wholeStageMaxNumFields >= cols.size
+ supportCodegen && vectorizedReader.toBoolean &&
cols.forall(_.dataType.isInstanceOf[AtomicType])
}
}
[13/50] [abbrv] carbondata git commit: [CARBONDATA-1248] change
LazyColumnPage parent class
Posted by ja...@apache.org.
[CARBONDATA-1248] change LazyColumnPage parent class
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/53b92e5f
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/53b92e5f
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/53b92e5f
Branch: refs/heads/datamap
Commit: 53b92e5f8fb527529a6b075f8c71d3e389b2cee5
Parents: 3e726e0
Author: jackylk <ja...@huawei.com>
Authored: Thu Jun 29 18:20:56 2017 +0800
Committer: QiangCai <qi...@qq.com>
Committed: Sat Jul 1 19:02:27 2017 +0800
----------------------------------------------------------------------
.../core/datastore/page/LazyColumnPage.java | 164 ++++++++++++++++++-
1 file changed, 163 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/53b92e5f/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java
index f90aa6c..6ec2e07 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java
@@ -17,11 +17,13 @@
package org.apache.carbondata.core.datastore.page;
+import java.math.BigDecimal;
+
/**
* This is a decorator of column page, it performs transformation lazily (when caller calls getXXX
* method to get the value from the page)
*/
-public class LazyColumnPage extends SafeFixLengthColumnPage {
+public class LazyColumnPage extends ColumnPage {
// decorated column page
private ColumnPage columnPage;
@@ -85,7 +87,167 @@ public class LazyColumnPage extends SafeFixLengthColumnPage {
}
@Override
+ public float getFloat(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public BigDecimal getDecimal(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public byte[] getBytePage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public short[] getShortPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public byte[] getShortIntPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public int[] getIntPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public long[] getLongPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public float[] getFloatPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public double[] getDoublePage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public byte[][] getByteArrayPage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public byte[] getFlattenedBytePage() {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void encode(PrimitiveCodec codec) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setBytePage(byte[] byteData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setShortPage(short[] shortData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setShortIntPage(byte[] shortIntData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setIntPage(int[] intData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setLongPage(long[] longData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setFloatPage(float[] floatData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setDoublePage(double[] doubleData) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void setByteArrayPage(byte[][] byteArray) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
public void freeMemory() {
columnPage.freeMemory();
}
+
+ @Override
+ public void putByte(int rowId, byte value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putShort(int rowId, short value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putInt(int rowId, int value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putLong(int rowId, long value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putDouble(int rowId, double value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putBytes(int rowId, byte[] bytes) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putShortInt(int rowId, int value) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public void putBytes(int rowId, byte[] bytes, int offset, int length) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public byte getByte(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public short getShort(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public int getShortInt(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
+
+ @Override
+ public int getInt(int rowId) {
+ throw new UnsupportedOperationException("internal error");
+ }
}
[19/50] [abbrv] carbondata git commit: [CARBONDATA-982] Fixed Bug For
NotIn Clause In Presto
Posted by ja...@apache.org.
[CARBONDATA-982] Fixed Bug For NotIn Clause In Presto
This closes #1062
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0d469761
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0d469761
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0d469761
Branch: refs/heads/datamap
Commit: 0d46976105239dd9ab94b83c3f0ca287b34fee8d
Parents: 27d520c
Author: Geetika gupta <ge...@knoldus.in>
Authored: Fri Jun 16 13:07:52 2017 +0530
Committer: chenliang613 <ch...@apache.org>
Committed: Mon Jul 3 17:43:56 2017 +0800
----------------------------------------------------------------------
.../carbondata/presto/CarbondataRecordSet.java | 5 +-
.../presto/CarbondataRecordSetProvider.java | 42 ++++-----
.../presto/CarbondataSplitManager.java | 96 ++++++++++++--------
3 files changed, 84 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0d469761/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSet.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSet.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSet.java
index a28342e..d75cbfb 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSet.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSet.java
@@ -21,6 +21,7 @@ import com.facebook.presto.spi.*;
import com.facebook.presto.spi.predicate.TupleDomain;
import com.facebook.presto.spi.type.Type;
import org.apache.carbondata.common.CarbonIterator;
+import org.apache.carbondata.core.datastore.block.BlockletInfos;
import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
@@ -83,9 +84,9 @@ public class CarbondataRecordSet implements RecordSet {
tableBlockInfoList.add(new TableBlockInfo(split.getLocalInputSplit().getPath().toString(),
split.getLocalInputSplit().getStart(), split.getLocalInputSplit().getSegmentId(),
split.getLocalInputSplit().getLocations().toArray(new String[0]),
- split.getLocalInputSplit().getLength(),
+ split.getLocalInputSplit().getLength(),new BlockletInfos(),
//blockletInfos,
- ColumnarFormatVersion.valueOf(split.getLocalInputSplit().getVersion()), null));
+ ColumnarFormatVersion.valueOf(split.getLocalInputSplit().getVersion()),null));
queryModel.setTableBlockInfos(tableBlockInfoList);
queryExecutor = QueryExecutorFactory.getQueryExecutor(queryModel);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0d469761/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
index a9652cc..71649f3 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
@@ -142,13 +142,13 @@ public class CarbondataRecordSetProvider implements ConnectorRecordSetProvider {
}
List<Object> singleValues = new ArrayList<>();
- List<Expression> rangeFilter = new ArrayList<>();
+ List<Expression> disjuncts = new ArrayList<>();
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
checkState(!range.isAll()); // Already checked
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
- List<String> rangeConjuncts = new ArrayList<>();
+ List<Expression> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
Object value = ConvertDataByType(range.getLow().getValue(), type);
switch (range.getLow().getBound()) {
@@ -157,15 +157,15 @@ public class CarbondataRecordSetProvider implements ConnectorRecordSetProvider {
//todo not now
} else {
GreaterThanExpression greater = new GreaterThanExpression(colExpression,
- new LiteralExpression(value, coltype));
- rangeFilter.add(greater);
+ new LiteralExpression(value, coltype));
+ rangeConjuncts.add(greater);
}
break;
case EXACTLY:
GreaterThanEqualToExpression greater =
- new GreaterThanEqualToExpression(colExpression,
- new LiteralExpression(value, coltype));
- rangeFilter.add(greater);
+ new GreaterThanEqualToExpression(colExpression,
+ new LiteralExpression(value, coltype));
+ rangeConjuncts.add(greater);
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
@@ -180,21 +180,21 @@ public class CarbondataRecordSetProvider implements ConnectorRecordSetProvider {
throw new IllegalArgumentException("High marker should never use ABOVE bound");
case EXACTLY:
LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression,
- new LiteralExpression(value, coltype));
- rangeFilter.add(less);
+ new LiteralExpression(value, coltype));
+ rangeConjuncts.add(less);
break;
case BELOW:
LessThanExpression less2 =
- new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
- rangeFilter.add(less2);
+ new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
+ rangeConjuncts.add(less2);
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
+ disjuncts.addAll(rangeConjuncts);
}
}
-
if (singleValues.size() == 1) {
Expression ex = null;
if (coltype.equals(DataType.STRING)) {
@@ -215,25 +215,25 @@ public class CarbondataRecordSetProvider implements ConnectorRecordSetProvider {
candidates = new ListExpression(exs);
if (candidates != null) filters.add(new InExpression(colExpression, candidates));
- } else if (rangeFilter.size() > 0) {
- if (rangeFilter.size() > 1) {
- Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
- if (rangeFilter.size() > 2) {
- for (int i = 2; i < rangeFilter.size(); i++) {
- filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
+ } else if (disjuncts.size() > 0) {
+ if (disjuncts.size() > 1) {
+ Expression finalFilters = new OrExpression(disjuncts.get(0), disjuncts.get(1));
+ if (disjuncts.size() > 2) {
+ for (int i = 2; i < disjuncts.size(); i++) {
+ filters.add(new AndExpression(finalFilters, disjuncts.get(i)));
}
}
- } else if (rangeFilter.size() == 1) filters.add(rangeFilter.get(0));
+ } else if (disjuncts.size() == 1) filters.add(disjuncts.get(0));
}
}
Expression finalFilters;
List<Expression> tmp = filters.build();
if (tmp.size() > 1) {
- finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
+ finalFilters = new OrExpression(tmp.get(0), tmp.get(1));
if (tmp.size() > 2) {
for (int i = 2; i < tmp.size(); i++) {
- finalFilters = new AndExpression(finalFilters, tmp.get(i));
+ finalFilters = new OrExpression(finalFilters, tmp.get(i));
}
}
} else if (tmp.size() == 1) finalFilters = tmp.get(0);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0d469761/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataSplitManager.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataSplitManager.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataSplitManager.java
index e39ee58..0ce0600 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataSplitManager.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataSplitManager.java
@@ -17,38 +17,59 @@
package org.apache.carbondata.presto;
+import javax.inject.Inject;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
+import org.apache.carbondata.core.scan.expression.ColumnExpression;
+import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.scan.expression.LiteralExpression;
+import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression;
+import org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression;
+import org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression;
+import org.apache.carbondata.core.scan.expression.conditional.InExpression;
+import org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression;
+import org.apache.carbondata.core.scan.expression.conditional.LessThanExpression;
+import org.apache.carbondata.core.scan.expression.conditional.ListExpression;
+import org.apache.carbondata.core.scan.expression.logical.AndExpression;
+import org.apache.carbondata.core.scan.expression.logical.OrExpression;
import org.apache.carbondata.presto.impl.CarbonLocalInputSplit;
import org.apache.carbondata.presto.impl.CarbonTableCacheModel;
import org.apache.carbondata.presto.impl.CarbonTableReader;
-import com.facebook.presto.spi.*;
+
+import com.facebook.presto.spi.ColumnHandle;
+import com.facebook.presto.spi.ConnectorSession;
+import com.facebook.presto.spi.ConnectorSplit;
+import com.facebook.presto.spi.ConnectorSplitSource;
+import com.facebook.presto.spi.ConnectorTableLayoutHandle;
+import com.facebook.presto.spi.FixedSplitSource;
+import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.connector.ConnectorSplitManager;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.facebook.presto.spi.predicate.Domain;
import com.facebook.presto.spi.predicate.Range;
import com.facebook.presto.spi.predicate.TupleDomain;
-import com.facebook.presto.spi.type.*;
+import com.facebook.presto.spi.type.BigintType;
+import com.facebook.presto.spi.type.BooleanType;
+import com.facebook.presto.spi.type.DateType;
+import com.facebook.presto.spi.type.DecimalType;
+import com.facebook.presto.spi.type.DoubleType;
+import com.facebook.presto.spi.type.IntegerType;
+import com.facebook.presto.spi.type.SmallintType;
+import com.facebook.presto.spi.type.TimestampType;
+import com.facebook.presto.spi.type.Type;
+import com.facebook.presto.spi.type.VarcharType;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
-import org.apache.carbondata.core.scan.expression.ColumnExpression;
-import org.apache.carbondata.core.scan.expression.Expression;
-import org.apache.carbondata.core.scan.expression.LiteralExpression;
-import org.apache.carbondata.core.scan.expression.conditional.*;
-import org.apache.carbondata.core.scan.expression.logical.AndExpression;
-import org.apache.carbondata.core.scan.expression.logical.OrExpression;
-
-import javax.inject.Inject;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import static org.apache.carbondata.presto.Types.checkType;
import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
import static java.util.Objects.requireNonNull;
+import static org.apache.carbondata.presto.Types.checkType;
/**
* Build Carbontable splits
@@ -152,13 +173,14 @@ public class CarbondataSplitManager implements ConnectorSplitManager {
}
List<Object> singleValues = new ArrayList<>();
- List<Expression> rangeFilter = new ArrayList<>();
+
+ List<Expression> disjuncts = new ArrayList<>();
+
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
- checkState(!range.isAll()); // Already checked
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
- List<String> rangeConjuncts = new ArrayList<>();
+ List<Expression> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
Object value = ConvertDataByType(range.getLow().getValue(), type);
switch (range.getLow().getBound()) {
@@ -168,14 +190,14 @@ public class CarbondataSplitManager implements ConnectorSplitManager {
} else {
GreaterThanExpression greater = new GreaterThanExpression(colExpression,
new LiteralExpression(value, coltype));
- rangeFilter.add(greater);
+ rangeConjuncts.add(greater);
}
break;
case EXACTLY:
GreaterThanEqualToExpression greater =
new GreaterThanEqualToExpression(colExpression,
new LiteralExpression(value, coltype));
- rangeFilter.add(greater);
+ rangeConjuncts.add(greater);
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
@@ -191,17 +213,18 @@ public class CarbondataSplitManager implements ConnectorSplitManager {
case EXACTLY:
LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression,
new LiteralExpression(value, coltype));
- rangeFilter.add(less);
+ rangeConjuncts.add(less);
break;
case BELOW:
LessThanExpression less2 =
new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
- rangeFilter.add(less2);
+ rangeConjuncts.add(less2);
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
+ disjuncts.addAll(rangeConjuncts);
}
}
@@ -221,26 +244,26 @@ public class CarbondataSplitManager implements ConnectorSplitManager {
candidates = new ListExpression(exs);
if (candidates != null) filters.add(new InExpression(colExpression, candidates));
- } else if (rangeFilter.size() > 0) {
- if (rangeFilter.size() > 1) {
- Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
- if (rangeFilter.size() > 2) {
- for (int i = 2; i < rangeFilter.size(); i++) {
- filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
+ } else if (disjuncts.size() > 0) {
+ if (disjuncts.size() > 1) {
+ Expression finalFilters = new OrExpression(disjuncts.get(0), disjuncts.get(1));
+ if (disjuncts.size() > 2) {
+ for (int i = 2; i < disjuncts.size(); i++) {
+ filters.add(new AndExpression(finalFilters, disjuncts.get(i)));
}
}
- } else if (rangeFilter.size() == 1)//only have one value
- filters.add(rangeFilter.get(0));
+ } else if (disjuncts.size() == 1)//only have one value
+ filters.add(disjuncts.get(0));
}
}
Expression finalFilters;
List<Expression> tmp = filters.build();
if (tmp.size() > 1) {
- finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
+ finalFilters = new OrExpression(tmp.get(0), tmp.get(1));
if (tmp.size() > 2) {
for (int i = 2; i < tmp.size(); i++) {
- finalFilters = new AndExpression(finalFilters, tmp.get(i));
+ finalFilters = new OrExpression(finalFilters, tmp.get(i));
}
}
} else if (tmp.size() == 1) finalFilters = tmp.get(0);
@@ -252,6 +275,7 @@ public class CarbondataSplitManager implements ConnectorSplitManager {
/**
* Convert presto spi Type into Carbondata Type
+ *
* @param colType
* @return
*/
[28/50] [abbrv] carbondata git commit: performance issue resolved
Posted by ja...@apache.org.
performance issue resolved
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1278c41b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1278c41b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1278c41b
Branch: refs/heads/datamap
Commit: 1278c41bb1692a3e542194626346c3324a5cfb4e
Parents: 427b88b
Author: sahakushal <ca...@gmail.com>
Authored: Wed Jun 28 16:29:01 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 10:38:56 2017 +0530
----------------------------------------------------------------------
.../apache/carbondata/spark/load/CarbonLoaderUtil.java | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1278c41b/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
index 54e12f3..5b603aa 100644
--- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
+++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java
@@ -742,14 +742,20 @@ public final class CarbonLoaderUtil {
if (null == nodeAndBlockMapping.get(node)) {
list = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
list.add(nbr.getBlock());
- Collections.sort(list);
nodeAndBlockMapping.put(node, list);
} else {
list = nodeAndBlockMapping.get(node);
list.add(nbr.getBlock());
- Collections.sort(list);
}
}
+ /*for resolving performance issue, removed values() with entrySet () iterating the values and
+ sorting it.entrySet will give the logical view for hashMap and we dont query the map twice for
+ each key whereas values () iterate twice*/
+ Iterator<Map.Entry<String, List<Distributable>>> iterator =
+ nodeAndBlockMapping.entrySet().iterator();
+ while (iterator.hasNext()) {
+ Collections.sort(iterator.next().getValue());
+ }
}
/**
[31/50] [abbrv] carbondata git commit: [CARBONDATA-1246] fix null
pointer exception by changing null to empty array. This closes #1113
Posted by ja...@apache.org.
[CARBONDATA-1246] fix null pointer exception by changing null to empty array. This closes #1113
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d9c3b483
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d9c3b483
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d9c3b483
Branch: refs/heads/datamap
Commit: d9c3b4837b81c02056a10446da18db0a1b427852
Parents: 49c64f7 659036f
Author: Venkata Ramana G <ra...@huawei.com>
Authored: Wed Jul 5 11:23:46 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 11:23:46 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/datastore/filesystem/AlluxioCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/HDFSCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/LocalCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/ViewFSCarbonFile.java | 2 +-
.../core/datastore/filesystem/AlluxioCarbonFileTest.java | 3 ++-
.../carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java | 3 ++-
.../carbondata/core/datastore/filesystem/LocalCarbonFileTest.java | 3 ++-
.../core/datastore/filesystem/ViewFsCarbonFileTest.java | 3 ++-
8 files changed, 12 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
[23/50] [abbrv] carbondata git commit: [CARBONDATA-980] Fix for Is
Not Null in presto
Posted by ja...@apache.org.
[CARBONDATA-980] Fix for Is Not Null in presto
This closes #1128
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bbb95ceb
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bbb95ceb
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bbb95ceb
Branch: refs/heads/datamap
Commit: bbb95ceb4041b818982214c395637c10d3904912
Parents: 0140a12
Author: jatin <ja...@knoldus.in>
Authored: Mon Jul 3 18:08:22 2017 +0530
Committer: chenliang613 <ch...@apache.org>
Committed: Mon Jul 3 23:35:07 2017 +0800
----------------------------------------------------------------------
.../org/apache/carbondata/presto/CarbondataRecordSetProvider.java | 1 -
1 file changed, 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bbb95ceb/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
index 71649f3..4b7864d 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataRecordSetProvider.java
@@ -144,7 +144,6 @@ public class CarbondataRecordSetProvider implements ConnectorRecordSetProvider {
List<Object> singleValues = new ArrayList<>();
List<Expression> disjuncts = new ArrayList<>();
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
- checkState(!range.isAll()); // Already checked
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
[10/50] [abbrv] carbondata git commit: [CARBONDATA-1251] Add test
cases for IUD feature This closes #1120
Posted by ja...@apache.org.
[CARBONDATA-1251] Add test cases for IUD feature This closes #1120
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ad800067
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ad800067
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ad800067
Branch: refs/heads/datamap
Commit: ad8000676b8aa994c6b70e738b3cf27604cbca4b
Parents: 28f8a0b 7e2e86e
Author: jackylk <ja...@huawei.com>
Authored: Fri Jun 30 20:34:46 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Fri Jun 30 20:34:46 2017 +0800
----------------------------------------------------------------------
.../iud/DeleteCarbonTableTestCase.scala | 31 ++++++++++++++++++--
.../iud/UpdateCarbonTableTestCase.scala | 2 ++
2 files changed, 30 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
[44/50] [abbrv] carbondata git commit: [CARBONDATA-1277] Dictionary
generation failure due to hdfs lease expiry
Posted by ja...@apache.org.
[CARBONDATA-1277] Dictionary generation failure due to hdfs lease expiry
This closes #1147
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/285ce72d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/285ce72d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/285ce72d
Branch: refs/heads/datamap
Commit: 285ce72d4c9b3364bbdc454f4b6b331b3caa42db
Parents: 8b31f09
Author: manishgupta88 <to...@gmail.com>
Authored: Sat Jul 8 15:46:25 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Tue Jul 11 18:00:18 2017 +0530
----------------------------------------------------------------------
.../core/constants/CarbonCommonConstants.java | 7 +
.../core/datastore/impl/FileFactory.java | 21 ++
.../AtomicFileOperationsImpl.java | 5 +-
.../apache/carbondata/core/util/CarbonUtil.java | 2 +-
.../core/util/path/HDFSLeaseUtils.java | 215 +++++++++++++++++++
.../core/writer/CarbonDictionaryWriterImpl.java | 20 +-
.../carbondata/core/writer/ThriftWriter.java | 2 +-
7 files changed, 266 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 208bab8..8110abb 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -1287,6 +1287,13 @@ public final class CarbonCommonConstants {
public static final String CARBON_BAD_RECORDS_ACTION_DEFAULT = "FORCE";
+ @CarbonProperty
+ public static final String CARBON_LEASE_RECOVERY_RETRY_COUNT =
+ "carbon.lease.recovery.retry.count";
+ @CarbonProperty
+ public static final String CARBON_LEASE_RECOVERY_RETRY_INTERVAL =
+ "carbon.lease.recovery.retry.interval";
+
private CarbonCommonConstants() {
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
index 7acd6b1..2a35ab3 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
@@ -518,4 +518,25 @@ public final class FileFactory {
}
}
+ /**
+ * This method will create the path object for a given file
+ *
+ * @param filePath
+ * @return
+ */
+ public static Path getPath(String filePath) {
+ return new Path(filePath);
+ }
+
+ /**
+ * This method will return the filesystem instance
+ *
+ * @param path
+ * @return
+ * @throws IOException
+ */
+ public static FileSystem getFileSystem(Path path) throws IOException {
+ return path.getFileSystem(configuration);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/fileoperations/AtomicFileOperationsImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/fileoperations/AtomicFileOperationsImpl.java b/core/src/main/java/org/apache/carbondata/core/fileoperations/AtomicFileOperationsImpl.java
index befc76e..61690ff 100644
--- a/core/src/main/java/org/apache/carbondata/core/fileoperations/AtomicFileOperationsImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/fileoperations/AtomicFileOperationsImpl.java
@@ -25,6 +25,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.datastore.impl.FileFactory.FileType;
+import org.apache.carbondata.core.util.CarbonUtil;
public class AtomicFileOperationsImpl implements AtomicFileOperations {
@@ -67,10 +68,8 @@ public class AtomicFileOperationsImpl implements AtomicFileOperations {
@Override public void close() throws IOException {
if (null != dataOutStream) {
- dataOutStream.close();
-
+ CarbonUtil.closeStream(dataOutStream);
CarbonFile tempFile = FileFactory.getCarbonFile(tempWriteFilePath, fileType);
-
if (!tempFile.renameForce(filePath)) {
throw new IOException("temporary file renaming failed, src="
+ tempFile.getPath() + ", dest=" + filePath);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 8298600..06b2a61 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -128,7 +128,7 @@ public final class CarbonUtil {
try {
closeStream(stream);
} catch (IOException e) {
- LOGGER.error("Error while closing stream:" + e);
+ LOGGER.error(e, "Error while closing stream:" + e);
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/util/path/HDFSLeaseUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/path/HDFSLeaseUtils.java b/core/src/main/java/org/apache/carbondata/core/util/path/HDFSLeaseUtils.java
new file mode 100644
index 0000000..c72c322
--- /dev/null
+++ b/core/src/main/java/org/apache/carbondata/core/util/path/HDFSLeaseUtils.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core.util.path;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.carbondata.common.logging.LogService;
+import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.util.CarbonProperties;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.viewfs.ViewFileSystem;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
+
+/**
+ * Implementation for HDFS utility methods
+ */
+public class HDFSLeaseUtils {
+
+ private static final int CARBON_LEASE_RECOVERY_RETRY_COUNT_MIN = 1;
+ private static final int CARBON_LEASE_RECOVERY_RETRY_COUNT_MAX = 50;
+ private static final String CARBON_LEASE_RECOVERY_RETRY_COUNT_DEFAULT = "5";
+ private static final int CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MIN = 1000;
+ private static final int CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MAX = 10000;
+ private static final String CARBON_LEASE_RECOVERY_RETRY_INTERVAL_DEFAULT = "1000";
+
+ /**
+ * LOGGER
+ */
+ private static final LogService LOGGER =
+ LogServiceFactory.getLogService(HDFSLeaseUtils.class.getName());
+
+ /**
+ * This method will validate whether the exception thrown if for lease recovery from HDFS
+ *
+ * @param message
+ * @return
+ */
+ public static boolean checkExceptionMessageForLeaseRecovery(String message) {
+ // depending on the scenario few more cases can be added for validating lease recovery exception
+ if (null != message && message.contains("Failed to APPEND_FILE")) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * This method will make attempts to recover lease on a file using the
+ * distributed file system utility.
+ *
+ * @param filePath
+ * @return
+ * @throws IOException
+ */
+ public static boolean recoverFileLease(String filePath) throws IOException {
+ LOGGER.info("Trying to recover lease on file: " + filePath);
+ FileFactory.FileType fileType = FileFactory.getFileType(filePath);
+ switch (fileType) {
+ case ALLUXIO:
+ case HDFS:
+ Path path = FileFactory.getPath(filePath);
+ FileSystem fs = FileFactory.getFileSystem(path);
+ return recoverLeaseOnFile(filePath, path, (DistributedFileSystem) fs);
+ case VIEWFS:
+ path = FileFactory.getPath(filePath);
+ fs = FileFactory.getFileSystem(path);
+ ViewFileSystem viewFileSystem = (ViewFileSystem) fs;
+ Path targetFileSystemPath = viewFileSystem.resolvePath(path);
+ FileSystem targetFileSystem = FileFactory.getFileSystem(targetFileSystemPath);
+ if (targetFileSystem instanceof DistributedFileSystem) {
+ return recoverLeaseOnFile(filePath, path, (DistributedFileSystem) targetFileSystem);
+ } else {
+ LOGGER.error(
+ "Invalid file type. Lease recovery is not supported on filesystem with file: "
+ + filePath);
+ return false;
+ }
+ default:
+ LOGGER.error("Invalid file type. Lease recovery is not supported on filesystem with file: "
+ + filePath);
+ return false;
+ }
+ }
+
+ /**
+ * Recovers lease on a file
+ *
+ * @param filePath
+ * @param path
+ * @param fs
+ * @return
+ * @throws IOException
+ */
+ private static boolean recoverLeaseOnFile(String filePath, Path path, DistributedFileSystem fs)
+ throws IOException {
+ DistributedFileSystem dfs = fs;
+ int maxAttempts = getLeaseRecoveryRetryCount();
+ int retryInterval = getLeaseRecoveryRetryInterval();
+ boolean leaseRecovered = false;
+ IOException ioException = null;
+ for (int retryCount = 1; retryCount <= maxAttempts; retryCount++) {
+ try {
+ leaseRecovered = dfs.recoverLease(path);
+ if (!leaseRecovered) {
+ try {
+ LOGGER.info(
+ "Failed to recover lease after attempt " + retryCount + " . Will try again after "
+ + retryInterval + " ms...");
+ Thread.sleep(retryInterval);
+ } catch (InterruptedException e) {
+ LOGGER.error(e,
+ "Interrupted exception occurred while recovering lease for file : " + filePath);
+ }
+ }
+ } catch (IOException e) {
+ if (e instanceof LeaseExpiredException && e.getMessage().contains("File does not exist")) {
+ LOGGER.error("The given file does not exist at path " + filePath);
+ throw e;
+ } else if (e instanceof FileNotFoundException) {
+ LOGGER.error("The given file does not exist at path " + filePath);
+ throw e;
+ } else {
+ LOGGER.error("Recover lease threw exception : " + e.getMessage());
+ ioException = e;
+ }
+ }
+ LOGGER.info("Retrying again after interval of " + retryInterval + " ms...");
+ }
+ if (leaseRecovered) {
+ LOGGER.info("Successfully able to recover lease on file: " + filePath);
+ return true;
+ } else {
+ LOGGER.error(
+ "Failed to recover lease on file: " + filePath + " after retrying for " + maxAttempts
+ + " at an interval of " + retryInterval);
+ if (null != ioException) {
+ throw ioException;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ private static int getLeaseRecoveryRetryCount() {
+ String retryMaxAttempts = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_COUNT,
+ CARBON_LEASE_RECOVERY_RETRY_COUNT_DEFAULT);
+ int retryCount = 0;
+ try {
+ retryCount = Integer.parseInt(retryMaxAttempts);
+ if (retryCount < CARBON_LEASE_RECOVERY_RETRY_COUNT_MIN
+ || retryCount > CARBON_LEASE_RECOVERY_RETRY_COUNT_MAX) {
+ retryCount = Integer.parseInt(CARBON_LEASE_RECOVERY_RETRY_COUNT_DEFAULT);
+ LOGGER.warn(
+ "value configured for " + CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_COUNT
+ + " is not in allowed range. Allowed range is >="
+ + CARBON_LEASE_RECOVERY_RETRY_COUNT_MIN + " and <="
+ + CARBON_LEASE_RECOVERY_RETRY_COUNT_MAX + ". Therefore considering default value: "
+ + retryCount);
+ }
+ } catch (NumberFormatException ne) {
+ retryCount = Integer.parseInt(CARBON_LEASE_RECOVERY_RETRY_COUNT_DEFAULT);
+ LOGGER.warn("value configured for " + CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_COUNT
+ + " is incorrect. Therefore considering default value: " + retryCount);
+ }
+ return retryCount;
+ }
+
+ private static int getLeaseRecoveryRetryInterval() {
+ String retryMaxAttempts = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_INTERVAL,
+ CARBON_LEASE_RECOVERY_RETRY_INTERVAL_DEFAULT);
+ int retryCount = 0;
+ try {
+ retryCount = Integer.parseInt(retryMaxAttempts);
+ if (retryCount < CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MIN
+ || retryCount > CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MAX) {
+ retryCount = Integer.parseInt(CARBON_LEASE_RECOVERY_RETRY_INTERVAL_DEFAULT);
+ LOGGER.warn(
+ "value configured for " + CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_INTERVAL
+ + " is not in allowed range. Allowed range is >="
+ + CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MIN + " and <="
+ + CARBON_LEASE_RECOVERY_RETRY_INTERVAL_MAX
+ + ". Therefore considering default value (ms): " + retryCount);
+ }
+ } catch (NumberFormatException ne) {
+ retryCount = Integer.parseInt(CARBON_LEASE_RECOVERY_RETRY_INTERVAL_DEFAULT);
+ LOGGER.warn(
+ "value configured for " + CarbonCommonConstants.CARBON_LEASE_RECOVERY_RETRY_INTERVAL
+ + " is incorrect. Therefore considering default value (ms): " + retryCount);
+ }
+ return retryCount;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/writer/CarbonDictionaryWriterImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/writer/CarbonDictionaryWriterImpl.java b/core/src/main/java/org/apache/carbondata/core/writer/CarbonDictionaryWriterImpl.java
index 9de41e1..64ff202 100644
--- a/core/src/main/java/org/apache/carbondata/core/writer/CarbonDictionaryWriterImpl.java
+++ b/core/src/main/java/org/apache/carbondata/core/writer/CarbonDictionaryWriterImpl.java
@@ -37,6 +37,7 @@ import org.apache.carbondata.core.service.CarbonCommonFactory;
import org.apache.carbondata.core.service.PathService;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;
+import org.apache.carbondata.core.util.path.HDFSLeaseUtils;
import org.apache.carbondata.format.ColumnDictionaryChunk;
import org.apache.carbondata.format.ColumnDictionaryChunkMeta;
@@ -359,7 +360,24 @@ public class CarbonDictionaryWriterImpl implements CarbonDictionaryWriter {
// create thrift writer instance
dictionaryThriftWriter = new ThriftWriter(dictionaryFile, true);
// open the file stream
- dictionaryThriftWriter.open();
+ try {
+ dictionaryThriftWriter.open();
+ } catch (IOException e) {
+ // Cases to handle
+ // 1. Handle File lease recovery
+ if (HDFSLeaseUtils.checkExceptionMessageForLeaseRecovery(e.getMessage())) {
+ LOGGER.error(e, "Lease recovery exception encountered for file: " + dictionaryFile);
+ boolean leaseRecovered = HDFSLeaseUtils.recoverFileLease(dictionaryFile);
+ if (leaseRecovered) {
+ // try to open output stream again after recovering the lease on file
+ dictionaryThriftWriter.open();
+ } else {
+ throw e;
+ }
+ } else {
+ throw e;
+ }
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/carbondata/blob/285ce72d/core/src/main/java/org/apache/carbondata/core/writer/ThriftWriter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/writer/ThriftWriter.java b/core/src/main/java/org/apache/carbondata/core/writer/ThriftWriter.java
index 9bf549d..d7b1a0f 100644
--- a/core/src/main/java/org/apache/carbondata/core/writer/ThriftWriter.java
+++ b/core/src/main/java/org/apache/carbondata/core/writer/ThriftWriter.java
@@ -136,7 +136,7 @@ public class ThriftWriter {
*/
public void close() throws IOException {
closeAtomicFileWriter();
- CarbonUtil.closeStreams(dataOutputStream);
+ CarbonUtil.closeStream(dataOutputStream);
}
/**
[22/50] [abbrv] carbondata git commit: [CARBONDATA-1254] Fixed
describe formatted for sort columns after alter #1123
Posted by ja...@apache.org.
[CARBONDATA-1254] Fixed describe formatted for sort columns after alter #1123
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0140a12e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0140a12e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0140a12e
Branch: refs/heads/datamap
Commit: 0140a12e6b34bbc36838221d1117553d3644bcea
Parents: 5f9741e e9329ee
Author: kumarvishal <ku...@gmail.com>
Authored: Mon Jul 3 20:31:45 2017 +0530
Committer: kumarvishal <ku...@gmail.com>
Committed: Mon Jul 3 20:31:45 2017 +0530
----------------------------------------------------------------------
.../core/metadata/schema/table/CarbonTable.java | 19 +++++++++++++++++++
.../execution/command/carbonTableSchema.scala | 6 +++---
.../execution/command/carbonTableSchema.scala | 6 +++---
.../AlterTableValidationTestCase.scala | 19 ++++++++++++++++++-
4 files changed, 43 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
[34/50] [abbrv] carbondata git commit: [CARBONDATA-1214] Changing the
delete syntax as in the hive for segment deletion
Posted by ja...@apache.org.
[CARBONDATA-1214] Changing the delete syntax as in the hive for segment deletion
This closes #1078
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/05de7fda
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/05de7fda
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/05de7fda
Branch: refs/heads/datamap
Commit: 05de7fdae5f02f172321af614532bbc331309fcb
Parents: 26d2f1c
Author: ravikiran23 <ra...@gmail.com>
Authored: Thu Jun 22 18:18:19 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jul 6 16:03:29 2017 +0530
----------------------------------------------------------------------
.../examples/DataManagementExample.scala | 4 ++--
.../MajorCompactionIgnoreInMinorTest.scala | 4 ++--
.../dataload/TestLoadTblNameIsKeyword.scala | 2 +-
.../DataRetentionConcurrencyTestCase.scala | 4 ++--
.../dataretention/DataRetentionTestCase.scala | 24 ++++++++++----------
.../iud/HorizontalCompactionTestCase.scala | 2 +-
.../spark/sql/catalyst/CarbonDDLSqlParser.scala | 1 +
.../org/apache/spark/sql/CarbonSqlParser.scala | 21 ++++++++---------
.../DataCompactionNoDictionaryTest.scala | 2 +-
.../sql/parser/CarbonSpark2SqlParser.scala | 18 +++++++--------
10 files changed, 39 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/examples/spark/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
----------------------------------------------------------------------
diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
index 4552e06..551a008 100644
--- a/examples/spark/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
+++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/DataManagementExample.scala
@@ -45,7 +45,7 @@ object DataManagementExample {
cc.sql("SHOW SEGMENTS FOR TABLE t3 ").show
// delete the first segment
- cc.sql("DELETE SEGMENT 0 FROM TABLE t3")
+ cc.sql("DELETE FROM TABLE T3 WHERE SEGMENT.ID IN (0)")
cc.sql("SHOW SEGMENTS FOR TABLE t3 LIMIT 10").show
// this query will be executed on last 4 segments, it should return 4000 rows
@@ -63,7 +63,7 @@ object DataManagementExample {
cc.sql("SELECT count(*) AS amount FROM t3").show
// delete all segments whose loading time is before '2099-01-01 01:00:00'
- cc.sql("DELETE SEGMENTS FROM TABLE t3 WHERE STARTTIME BEFORE '2099-01-01 01:00:00'")
+ cc.sql("DELETE FROM TABLE T3 WHERE SEGMENT.STARTTIME BEFORE '2099-01-01 01:00:00'")
cc.sql("SHOW SEGMENTS FOR TABLE t3 ").show
// this query will be executed on 0 segments, it should return 0 rows
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
index 9d2cf96..b66e37b 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/MajorCompactionIgnoreInMinorTest.scala
@@ -103,7 +103,7 @@ class MajorCompactionIgnoreInMinorTest extends QueryTest with BeforeAndAfterAll
*/
test("delete compacted segment and check status") {
try {
- sql("delete segment 2 from table ignoremajor")
+ sql("delete from table ignoremajor where segment.id in (2)")
assert(false)
}
catch {
@@ -128,7 +128,7 @@ class MajorCompactionIgnoreInMinorTest extends QueryTest with BeforeAndAfterAll
*/
test("delete compacted segment by date and check status") {
sql(
- "DELETE SEGMENTS FROM TABLE ignoremajor where STARTTIME before" +
+ "delete from table ignoremajor where segment.starttime before " +
" '2222-01-01 19:35:01'"
)
val carbontablePath = CarbonStorePath
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadTblNameIsKeyword.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadTblNameIsKeyword.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadTblNameIsKeyword.scala
index cadaa6e..71aeb99 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadTblNameIsKeyword.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataload/TestLoadTblNameIsKeyword.scala
@@ -78,7 +78,7 @@ class TestLoadTblNameIsKeyword extends QueryTest with BeforeAndAfterAll {
LOAD DATA LOCAL INPATH '$testData' into table timestamp
""")
sql("show segments for table timestamp")
- sql("delete segments from table timestamp where starttime before '2099-10-01 18:00:00'")
+ sql("delete from table timestamp where segment.starttime before '2099-10-01 18:00:00'")
sql("clean files for table timestamp")
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionConcurrencyTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionConcurrencyTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionConcurrencyTestCase.scala
index 784382b..79350eb 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionConcurrencyTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionConcurrencyTestCase.scala
@@ -57,7 +57,7 @@ class DataRetentionConcurrencyTestCase extends QueryTest with BeforeAndAfterAll
val tasks = new util.ArrayList[Callable[String]]()
tasks
.add(new QueryTask(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE concurrent OPTIONS('DELIMITER' = ',')"))
- tasks.add(new QueryTask("Delete segment 0 from table concurrent"))
+ tasks.add(new QueryTask("delete from table concurrent where segment.id in (0)"))
tasks.add(new QueryTask("clean files for table concurrent"))
val results = executorService.invokeAll(tasks)
for (i <- 0 until tasks.size()) {
@@ -77,7 +77,7 @@ class DataRetentionConcurrencyTestCase extends QueryTest with BeforeAndAfterAll
.add(new QueryTask(s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE concurrent OPTIONS('DELIMITER' = ',')"))
tasks
.add(new QueryTask(
- "DELETE SEGMENTS FROM TABLE concurrent where STARTTIME before '2099-01-01 00:00:00'"))
+ "delete from table concurrent where segment.starttime before '2099-01-01 00:00:00'"))
tasks.add(new QueryTask("clean files for table concurrent"))
val results = executorService.invokeAll(tasks)
for (i <- 0 until tasks.size()) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionTestCase.scala
index 684ed8e..b255099 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/dataretention/DataRetentionTestCase.scala
@@ -131,7 +131,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
val actualValue: String = getSegmentStartTime(segments, 1)
// delete segments (0,1) which contains ind, aus
sql(
- "DELETE SEGMENTS FROM TABLE DataRetentionTable where STARTTIME before '" + actualValue + "'")
+ "delete from table DataRetentionTable where segment.starttime before '" + actualValue + "'")
// load segment 2 which contains eng
sql(
@@ -147,7 +147,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
test("RetentionTest3_DeleteByLoadId") {
// delete segment 2 and load ind segment
- sql("DELETE SEGMENT 2 FROM TABLE DataRetentionTable")
+ sql("delete from table DataRetentionTable where segment.id in (2)")
sql(
s"LOAD DATA LOCAL INPATH '$resourcesPath/dataretention1.csv' INTO TABLE DataRetentionTable " +
"OPTIONS('DELIMITER' = ',')")
@@ -166,7 +166,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
test("RetentionTest4_DeleteByInvalidLoadId") {
try {
// delete segment with no id
- sql("DELETE SEGMENT FROM TABLE DataRetentionTable")
+ sql("delete from table DataRetentionTable where segment.id in ()")
assert(false)
} catch {
case e: MalformedCarbonCommandException =>
@@ -191,8 +191,8 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
checkAnswer(
sql("select count(*) from carbon_table_1"), Seq(Row(20)))
- sql("delete segments from table carbon_table_1 " +
- "where starttime before '2099-07-28 11:00:00'")
+ sql("delete from table carbon_table_1 where segment.starttime " +
+ " before '2099-07-28 11:00:00'")
checkAnswer(
sql("select count(*) from carbon_table_1"), Seq(Row(0)))
@@ -204,7 +204,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
try {
sql(
- "DELETE SEGMENTS FROM TABLE DataRetentionTable where STARTTIME before" +
+ "delete from table DataRetentionTable where segment.starttime before" +
" 'abcd-01-01 00:00:00'")
assert(false)
} catch {
@@ -215,7 +215,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
try {
sql(
- "DELETE SEGMENTS FROM TABLE DataRetentionTable where STARTTIME before" +
+ "delete from table DataRetentionTable where segment.starttime before" +
" '2099:01:01 00:00:00'")
assert(false)
} catch {
@@ -230,7 +230,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
),
Seq(Row("ind", 9))
)
- sql("DELETE SEGMENTS FROM TABLE DataRetentionTable where STARTTIME before '2099-01-01'")
+ sql("delete from table DataRetentionTable where segment.starttime before '2099-01-01'")
checkAnswer(
sql("SELECT country, count(salary) AS amount FROM DataRetentionTable WHERE country" +
" IN ('china','ind','aus','eng') GROUP BY country"), Seq())
@@ -280,7 +280,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
// delete segment 0 it should fail
try {
- sql("DELETE SEGMENT 0 FROM TABLE retentionlock")
+ sql("delete from table retentionlock where segment.id in (0)")
throw new MalformedCarbonCommandException("Invalid")
} catch {
case me: MalformedCarbonCommandException =>
@@ -291,7 +291,7 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
// it should fail
try {
- sql("DELETE SEGMENTS FROM TABLE retentionlock where STARTTIME before " +
+ sql("delete from table retentionlock where segment.starttime before " +
"'2099-01-01 00:00:00.0'")
throw new MalformedCarbonCommandException("Invalid")
} catch {
@@ -317,10 +317,10 @@ class DataRetentionTestCase extends QueryTest with BeforeAndAfterAll {
carbonCleanFilesLock.unlock()
carbonDeleteSegmentLock.unlock()
- sql("DELETE SEGMENT 0 FROM TABLE retentionlock")
+ sql("delete from table retentionlock where segment.id in (0)")
//load and delete should execute parallely
carbonMetadataLock.lockWithRetries()
- sql("DELETE SEGMENT 1 FROM TABLE retentionlock")
+ sql("delete from table retentionlock where segment.id in (1)")
carbonMetadataLock.unlock()
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/HorizontalCompactionTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/HorizontalCompactionTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/HorizontalCompactionTestCase.scala
index d8310da..0d30333 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/HorizontalCompactionTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/iud/HorizontalCompactionTestCase.scala
@@ -321,7 +321,7 @@ class HorizontalCompactionTestCase extends QueryTest with BeforeAndAfterAll {
sql(s"""load data local inpath '$resourcesPath/IUD/comp4.csv' INTO table dest2""")
sql(
"""delete from dest2 where (c2 < 3) or (c2 > 10 and c2 < 13) or (c2 > 20 and c2 < 23) or (c2 > 30 and c2 < 33)""").show()
- sql("""DELETE SEGMENT 0 FROM TABLE dest2""")
+ sql("""delete from table dest2 where segment.id in (0) """)
sql("""clean files for table dest2""")
sql(
"""update dest2 set (c5) = ('8RAM size') where (c2 > 3 and c2 < 5) or (c2 > 13 and c2 < 15) or (c2 > 23 and c2 < 25) or (c2 > 33 and c2 < 35)""")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
index 8207a9d..4dbdc8d 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/catalyst/CarbonDDLSqlParser.scala
@@ -164,6 +164,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
protected val SMALLINT = carbonKeyWord("SMALLINT")
protected val CHANGE = carbonKeyWord("CHANGE")
protected val TBLPROPERTIES = carbonKeyWord("TBLPROPERTIES")
+ protected val ID = carbonKeyWord("ID")
protected val doubleQuotedString = "\"([^\"]+)\"".r
protected val singleQuotedString = "'([^']+)'".r
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index a664104..9dc9ee2 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -417,23 +417,20 @@ class CarbonSqlParser() extends CarbonDDLSqlParser {
}
protected lazy val deleteLoadsByID: Parser[LogicalPlan] =
- DELETE ~> SEGMENT ~> repsep(segmentId, ",") ~ (FROM ~> TABLE ~>
- (ident <~ ".").? ~ ident) <~
- opt(";") ^^ {
- case loadids ~ table => table match {
- case databaseName ~ tableName =>
- DeleteLoadsById(loadids, convertDbNameToLowerCase(databaseName), tableName.toLowerCase())
- }
- }
+ DELETE ~> FROM ~ TABLE ~> (ident <~ ".").? ~ ident ~
+ (WHERE ~> (SEGMENT ~ "." ~ ID) ~> IN ~> "(" ~> repsep(segmentId, ",")) <~ ")" ~ opt(";") ^^ {
+ case dbName ~ tableName ~ loadids =>
+ DeleteLoadsById(loadids, convertDbNameToLowerCase(dbName), tableName.toLowerCase())
+ }
protected lazy val deleteLoadsByLoadDate: Parser[LogicalPlan] =
- DELETE ~> SEGMENTS ~> FROM ~> TABLE ~> (ident <~ ".").? ~ ident ~
- (WHERE ~> (STARTTIME <~ BEFORE) ~ stringLit) <~
+ DELETE ~> FROM ~> TABLE ~> (ident <~ ".").? ~ ident ~
+ (WHERE ~> (SEGMENT ~ "." ~ STARTTIME ~> BEFORE) ~ stringLit) <~
opt(";") ^^ {
- case schema ~ table ~ condition =>
+ case database ~ table ~ condition =>
condition match {
case dateField ~ dateValue =>
- DeleteLoadsByLoadDate(convertDbNameToLowerCase(schema),
+ DeleteLoadsByLoadDate(convertDbNameToLowerCase(database),
table.toLowerCase(),
dateField,
dateValue)
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/DataCompactionNoDictionaryTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/DataCompactionNoDictionaryTest.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/DataCompactionNoDictionaryTest.scala
index dda2a88..5897681 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/DataCompactionNoDictionaryTest.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/DataCompactionNoDictionaryTest.scala
@@ -156,7 +156,7 @@ class DataCompactionNoDictionaryTest extends QueryTest with BeforeAndAfterAll {
sql("LOAD DATA LOCAL INPATH '" + csvFilePath1 + "' INTO TABLE nodictionaryCompaction " +
"OPTIONS('DELIMITER' = ',')"
)
- sql("DELETE segment 0.1,3 FROM TABLE nodictionaryCompaction")
+ sql("delete from table nodictionaryCompaction where segment.id in (0.1,3)")
checkAnswer(
sql("select country from nodictionaryCompaction"),
Seq()
http://git-wip-us.apache.org/repos/asf/carbondata/blob/05de7fda/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
index af286eb..511a61c 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
@@ -220,23 +220,21 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
}
protected lazy val deleteLoadsByID: Parser[LogicalPlan] =
- DELETE ~> SEGMENT ~> repsep(segmentId, ",") ~ (FROM ~> TABLE ~>
- (ident <~ ".").? ~ ident) <~
+ DELETE ~> FROM ~ TABLE ~> (ident <~ ".").? ~ ident ~
+ (WHERE ~> (SEGMENT ~ "." ~ ID) ~> IN ~> "(" ~> repsep(segmentId, ",")) <~ ")" ~
opt(";") ^^ {
- case loadids ~ table => table match {
- case databaseName ~ tableName =>
- DeleteLoadsById(loadids, convertDbNameToLowerCase(databaseName), tableName.toLowerCase())
- }
+ case dbName ~ tableName ~ loadids =>
+ DeleteLoadsById(loadids, dbName, tableName.toLowerCase())
}
protected lazy val deleteLoadsByLoadDate: Parser[LogicalPlan] =
- DELETE ~> SEGMENTS ~> FROM ~> TABLE ~> (ident <~ ".").? ~ ident ~
- (WHERE ~> (STARTTIME <~ BEFORE) ~ stringLit) <~
+ DELETE ~> FROM ~> TABLE ~> (ident <~ ".").? ~ ident ~
+ (WHERE ~> (SEGMENT ~ "." ~ STARTTIME ~> BEFORE) ~ stringLit) <~
opt(";") ^^ {
- case schema ~ table ~ condition =>
+ case database ~ table ~ condition =>
condition match {
case dateField ~ dateValue =>
- DeleteLoadsByLoadDate(convertDbNameToLowerCase(schema),
+ DeleteLoadsByLoadDate(convertDbNameToLowerCase(database),
table.toLowerCase(),
dateField,
dateValue)
[07/50] [abbrv] carbondata git commit: [CARBONDATA-1236] Support
absolute path without scheme in loading - change in logic
Posted by ja...@apache.org.
[CARBONDATA-1236] Support absolute path without scheme in loading - change in logic
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c671c5b6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c671c5b6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c671c5b6
Branch: refs/heads/datamap
Commit: c671c5b60ad4989643e9b6f92540b60c2c540145
Parents: 126a041
Author: dhatchayani <dh...@gmail.com>
Authored: Thu Jun 29 17:59:33 2017 +0530
Committer: dhatchayani <dh...@gmail.com>
Committed: Thu Jun 29 18:02:50 2017 +0530
----------------------------------------------------------------------
.../apache/carbondata/core/util/CarbonUtil.java | 50 +++++------
.../carbondata/core/util/CarbonUtilTest.java | 89 +++++++++++++++++++-
2 files changed, 112 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c671c5b6/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 6e0348b..8298600 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -697,32 +697,32 @@ public final class CarbonUtil {
*/
public static String checkAndAppendHDFSUrl(String filePath) {
String currentPath = filePath;
- if (null != filePath && filePath.length() != 0
- && FileFactory.getFileType(filePath) != FileFactory.FileType.HDFS
- && FileFactory.getFileType(filePath) != FileFactory.FileType.VIEWFS) {
- if (!filePath.startsWith("/")) {
- filePath = "/" + filePath;
- }
- String baseDFSUrl = CarbonProperties.getInstance()
- .getProperty(CarbonCommonConstants.CARBON_DDL_BASE_HDFS_URL);
- String dfsUrl = conf.get(FS_DEFAULT_FS);
- if (null != baseDFSUrl) {
- if (!baseDFSUrl.startsWith("/")) {
- baseDFSUrl = "/" + baseDFSUrl;
- }
- if (null != dfsUrl && (dfsUrl.startsWith(HDFS_PREFIX) || dfsUrl
- .startsWith(VIEWFS_PREFIX))) {
- baseDFSUrl = dfsUrl + baseDFSUrl;
- }
- if (baseDFSUrl.endsWith("/")) {
- baseDFSUrl = baseDFSUrl.substring(0, baseDFSUrl.length() - 1);
- }
- currentPath = baseDFSUrl + filePath;
- } else {
- currentPath = dfsUrl + filePath;
- }
+ String defaultFsUrl = conf.get(FS_DEFAULT_FS);
+ String baseDFSUrl = CarbonProperties.getInstance()
+ .getProperty(CarbonCommonConstants.CARBON_DDL_BASE_HDFS_URL, "");
+ if (checkIfPrefixExists(filePath)) {
+ return currentPath;
}
- return currentPath;
+ if (baseDFSUrl.endsWith("/")) {
+ baseDFSUrl = baseDFSUrl.substring(0, baseDFSUrl.length() - 1);
+ }
+ if (!filePath.startsWith("/")) {
+ filePath = "/" + filePath;
+ }
+ currentPath = baseDFSUrl + filePath;
+ if (checkIfPrefixExists(currentPath)) {
+ return currentPath;
+ }
+ if (defaultFsUrl == null) {
+ return currentPath;
+ }
+ return defaultFsUrl + currentPath;
+ }
+
+ private static boolean checkIfPrefixExists(String path) {
+ final String lowerPath = path.toLowerCase();
+ return lowerPath.startsWith(HDFS_PREFIX) || lowerPath.startsWith(VIEWFS_PREFIX) || lowerPath
+ .startsWith("file://") || lowerPath.startsWith(ALLUXIO_PREFIX);
}
public static String getCarbonStorePath() {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c671c5b6/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
index b42c57e..776059f 100644
--- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java
@@ -342,7 +342,7 @@ public class CarbonUtilTest {
}
};
String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("../core/src/test/resources/testDatabase");
- assertEquals(hdfsURL, "/BASE_URL/../core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "file:///BASE_URL/../core/src/test/resources/testDatabase");
}
@Test public void testToCheckAndAppendHDFSUrlWithBlackSlash() {
@@ -357,7 +357,7 @@ public class CarbonUtilTest {
}
};
String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("../core/src/test/resources/testDatabase");
- assertEquals(hdfsURL, "/BASE_URL/../core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "file:///BASE_URL/../core/src/test/resources/testDatabase");
}
@Test public void testToCheckAndAppendHDFSUrlWithNull() {
@@ -375,6 +375,91 @@ public class CarbonUtilTest {
assertEquals(hdfsURL, "file:////../core/src/test/resources/testDatabase");
}
+ @Test public void testToCheckAndAppendHDFSUrlWithHdfs() {
+ new MockUp<FileFactory>() {
+ @SuppressWarnings("unused") @Mock public FileFactory.FileType getFileType(String path) {
+ return FileFactory.FileType.HDFS;
+ }
+ };
+ new MockUp<org.apache.hadoop.conf.Configuration>() {
+ @SuppressWarnings("unused") @Mock public String get(String name) {
+ return "hdfs://";
+ }
+ };
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("hdfs://ha/core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "hdfs://ha/core/src/test/resources/testDatabase");
+ }
+
+ @Test public void testToCheckAndAppendHDFSUrlWithDoubleSlashLocal() {
+ new MockUp<FileFactory>() {
+ @SuppressWarnings("unused") @Mock public FileFactory.FileType getFileType(String path) {
+ return FileFactory.FileType.LOCAL;
+ }
+ };
+ new MockUp<CarbonProperties>() {
+ @SuppressWarnings("unused") @Mock public String getProperty(String key) {
+ return "/opt/";
+ }
+ };
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("/core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "file:////opt/core/src/test/resources/testDatabase");
+ }
+
+ @Test public void testToCheckAndAppendHDFSUrlWithDoubleSlashHDFS() {
+ new MockUp<FileFactory>() {
+ @SuppressWarnings("unused") @Mock public FileFactory.FileType getFileType(String path) {
+ return FileFactory.FileType.HDFS;
+ }
+ };
+ new MockUp<org.apache.hadoop.conf.Configuration>() {
+ @SuppressWarnings("unused") @Mock public String get(String name) {
+ return "hdfs://";
+ }
+ };
+ new MockUp<CarbonProperties>() {
+ @SuppressWarnings("unused") @Mock public String getProperty(String key) {
+ return "/opt/";
+ }
+ };
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("/core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "hdfs:///opt/core/src/test/resources/testDatabase");
+ }
+
+ @Test public void testToCheckAndAppendHDFSUrlWithBaseURLPrefix() {
+ new MockUp<FileFactory>() {
+ @SuppressWarnings("unused") @Mock public FileFactory.FileType getFileType(String path) {
+ return FileFactory.FileType.HDFS;
+ }
+ };
+ new MockUp<CarbonProperties>() {
+ @SuppressWarnings("unused") @Mock public String getProperty(String key) {
+ return "hdfs://ha/opt/";
+ }
+ };
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("/core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "hdfs://ha/opt/core/src/test/resources/testDatabase");
+ }
+
+ @Test public void testToCheckAndAppendHDFSUrlWithBaseURLFile() {
+ new MockUp<FileFactory>() {
+ @SuppressWarnings("unused") @Mock public FileFactory.FileType getFileType(String path) {
+ return FileFactory.FileType.HDFS;
+ }
+ };
+ new MockUp<CarbonProperties>() {
+ @SuppressWarnings("unused") @Mock public String getProperty(String key) {
+ return "file:///";
+ }
+ };
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("/core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "file:///core/src/test/resources/testDatabase");
+ }
+
+ @Test public void testToCheckAndAppendHDFSUrlWithFilepathPrefix() {
+ String hdfsURL = CarbonUtil.checkAndAppendHDFSUrl("file:///core/src/test/resources/testDatabase");
+ assertEquals(hdfsURL, "file:///core/src/test/resources/testDatabase");
+ }
+
@Test public void testForisFileExists() {
assertTrue(CarbonUtil.isFileExists("../core/src/test/resources/testFile.txt"));
}
[30/50] [abbrv] carbondata git commit: fix null pointer exception by
changing null to empty array
Posted by ja...@apache.org.
fix null pointer exception by changing null to empty array
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/659036fe
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/659036fe
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/659036fe
Branch: refs/heads/datamap
Commit: 659036fee8dd1645eea31eeab0423bd0d5c03f19
Parents: 49c64f7
Author: Jin Guodong <je...@gmail.com>
Authored: Thu Jun 29 12:43:51 2017 +0800
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 5 11:21:47 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/datastore/filesystem/AlluxioCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/HDFSCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/LocalCarbonFile.java | 2 +-
.../carbondata/core/datastore/filesystem/ViewFSCarbonFile.java | 2 +-
.../core/datastore/filesystem/AlluxioCarbonFileTest.java | 3 ++-
.../carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java | 3 ++-
.../carbondata/core/datastore/filesystem/LocalCarbonFileTest.java | 3 ++-
.../core/datastore/filesystem/ViewFsCarbonFileTest.java | 3 ++-
8 files changed, 12 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFile.java
index 36e4868..c3ccd0c 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFile.java
@@ -74,7 +74,7 @@ public class AlluxioCarbonFile extends AbstractDFSCarbonFile {
Path path = fileStatus.getPath();
listStatus = path.getFileSystem(FileFactory.getConfiguration()).listStatus(path);
} else {
- return null;
+ return new CarbonFile[0];
}
} catch (IOException e) {
LOGGER.error("Exception occured: " + e.getMessage());
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFile.java
index 75d9fc3..19bdc7b 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFile.java
@@ -72,7 +72,7 @@ public class HDFSCarbonFile extends AbstractDFSCarbonFile {
Path path = fileStatus.getPath();
listStatus = path.getFileSystem(FileFactory.getConfiguration()).listStatus(path);
} else {
- return null;
+ return new CarbonFile[0];
}
} catch (IOException e) {
LOGGER.error("Exception occured: " + e.getMessage());
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
index 9f90713..89a5cd3 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
@@ -123,7 +123,7 @@ public class LocalCarbonFile implements CarbonFile {
@Override public CarbonFile[] listFiles() {
if (!file.isDirectory()) {
- return null;
+ return new CarbonFile[0];
}
File[] files = file.listFiles();
if (files == null) {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/ViewFSCarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/ViewFSCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/ViewFSCarbonFile.java
index da15371..e05112d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/ViewFSCarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/ViewFSCarbonFile.java
@@ -71,7 +71,7 @@ public class ViewFSCarbonFile extends AbstractDFSCarbonFile {
Path path = fileStatus.getPath();
listStatus = path.getFileSystem(FileFactory.getConfiguration()).listStatus(path);
} else {
- return null;
+ return new CarbonFile[0];
}
} catch (IOException ex) {
LOGGER.error("Exception occured" + ex.getMessage());
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFileTest.java
index eff0d8b..f55cd67 100644
--- a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFileTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/AlluxioCarbonFileTest.java
@@ -35,6 +35,7 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -90,7 +91,7 @@ public class AlluxioCarbonFileTest {
@Test
public void testListFilesWithOutDirectoryPermission() {
alluxioCarbonFile = new AlluxioCarbonFile(fileStatusWithOutDirectoryPermission);
- assertTrue(alluxioCarbonFile.listFiles() == null);
+ assertArrayEquals(alluxioCarbonFile.listFiles(), new CarbonFile[0]);
}
@Test
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java
index eeecb03..7726693 100644
--- a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/HDFSCarbonFileTest.java
@@ -39,6 +39,7 @@ import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -132,7 +133,7 @@ public class HDFSCarbonFileTest {
}
};
- assertEquals(hdfsCarbonFile.listFiles(), null);
+ assertArrayEquals(hdfsCarbonFile.listFiles(), new CarbonFile[0]);
}
@Test
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFileTest.java
index 3925e9e..9ae5c34 100644
--- a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFileTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFileTest.java
@@ -30,6 +30,7 @@ import java.io.*;
import java.nio.channels.ReadableByteChannel;
import java.util.Objects;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@@ -102,7 +103,7 @@ public class LocalCarbonFileTest {
return false;
}
};
- assertTrue(localCarbonFile.listFiles() == null);
+ assertArrayEquals(localCarbonFile.listFiles(), new CarbonFile[0]);
}
@Test
http://git-wip-us.apache.org/repos/asf/carbondata/blob/659036fe/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/ViewFsCarbonFileTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/ViewFsCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/ViewFsCarbonFileTest.java
index 3cb8198..ba661b1 100644
--- a/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/ViewFsCarbonFileTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/datastore/filesystem/ViewFsCarbonFileTest.java
@@ -35,6 +35,7 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -91,7 +92,7 @@ public class ViewFsCarbonFileTest {
@Test
public void testListFilesWithOutDirectoryPermission() {
viewFSCarbonFile = new ViewFSCarbonFile(fileStatusWithOutDirectoryPermission);
- assertTrue(viewFSCarbonFile.listFiles() == null);
+ assertArrayEquals(viewFSCarbonFile.listFiles(), new CarbonFile[0]);
}
@Test
[35/50] [abbrv] carbondata git commit: [CARBONDATA-1270]
Documentation update for Delete by ID and DATE syntax and example
Posted by ja...@apache.org.
[CARBONDATA-1270] Documentation update for Delete by ID and DATE syntax and example
This closes #1141
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5af529e0
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5af529e0
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5af529e0
Branch: refs/heads/datamap
Commit: 5af529e0e0ab54fb592774bd4f6eae11d5000187
Parents: 05de7fd
Author: sgururajshetty <sg...@gmail.com>
Authored: Thu Jul 6 11:53:38 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Thu Jul 6 16:22:33 2017 +0530
----------------------------------------------------------------------
docs/dml-operation-on-carbondata.md | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/5af529e0/docs/dml-operation-on-carbondata.md
----------------------------------------------------------------------
diff --git a/docs/dml-operation-on-carbondata.md b/docs/dml-operation-on-carbondata.md
index 45f7464..4928098 100644
--- a/docs/dml-operation-on-carbondata.md
+++ b/docs/dml-operation-on-carbondata.md
@@ -262,14 +262,13 @@ Using this segment ID, you can remove the segment.
The following command will get the segmentID.
```
-SHOW SEGMENTS FOR Table dbname.tablename LIMIT number_of_segments
+SHOW SEGMENTS FOR Table [db_name.]table_name LIMIT number_of_segments
```
After you retrieve the segment ID of the segment that you want to delete, execute the following command to delete the selected segment.
```
-DELETE SEGMENT segment_sequence_id1, segments_sequence_id2, ....
-FROM TABLE tableName
+DELETE FROM TABLE [db_name.]table_name WERE SEGMENT.ID IN (segment_id1, segments_id2, ....)
```
### Parameter Description
@@ -282,8 +281,8 @@ FROM TABLE tableName
### Example:
```
-DELETE SEGMENT 0 FROM TABLE CarbonDatabase.CarbonTable;
-DELETE SEGMENT 0.1,5,8 FROM TABLE CarbonDatabase.CarbonTable;
+DELETE FROM TABLE CarbonDatabase.CarbonTable WHERE SEGMENT.ID IN (0);
+DELETE FROM TABLE CarbonDatabase.CarbonTable WHERE SEGMENT.ID IN (0,5,8);
```
NOTE: Here 0.1 is compacted segment sequence id.
@@ -293,8 +292,8 @@ This command will allow to delete the CarbonData segment(s) from the store based
The segment created before the particular date will be removed from the specific stores.
```
-DELETE SEGMENTS FROM TABLE [db_name.]table_name
-WHERE STARTTIME BEFORE DATE_VALUE
+DELETE FROM TABLE [db_name.]table_name
+WHERE SEGMENT.STARTTIME BEFORE DATE_VALUE
```
### Parameter Description
@@ -308,8 +307,8 @@ WHERE STARTTIME BEFORE DATE_VALUE
### Example:
```
- DELETE SEGMENTS FROM TABLE CarbonDatabase.CarbonTable
- WHERE STARTTIME BEFORE '2017-06-01 12:05:06';
+ DELETE FROM TABLE CarbonDatabase.CarbonTable
+ WHERE SEGMENT.STARTTIME BEFORE '2017-06-01 12:05:06';
```
## Update CarbonData Table
[42/50] [abbrv] carbondata git commit: [CARBONDATA-1229] acquired
meta.lock during table drop
Posted by ja...@apache.org.
[CARBONDATA-1229] acquired meta.lock during table drop
This closes #1153
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/403c3d9b
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/403c3d9b
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/403c3d9b
Branch: refs/heads/datamap
Commit: 403c3d9b41e166311ac45ec33b375cbecc8c4741
Parents: 619f1f9
Author: kunalkapoor <ku...@gmail.com>
Authored: Mon Jul 10 12:12:10 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Mon Jul 10 19:32:43 2017 +0530
----------------------------------------------------------------------
.../carbondata/core/locks/CarbonLockUtil.java | 24 +++++++++
.../execution/command/carbonTableSchema.scala | 52 +++++++++-----------
.../org/apache/spark/util/AlterTableUtil.scala | 25 +---------
3 files changed, 50 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/403c3d9b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java
index fba03a1..eaaaf94 100644
--- a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockUtil.java
@@ -19,6 +19,7 @@ package org.apache.carbondata.core.locks;
import org.apache.carbondata.common.logging.LogService;
import org.apache.carbondata.common.logging.LogServiceFactory;
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
/**
* This class contains all carbon lock utilities
@@ -60,4 +61,27 @@ public class CarbonLockUtil {
}
}
}
+
+ /**
+ * Given a lock type this method will return a new lock object if not acquired by any other
+ * operation
+ *
+ * @param carbonTable
+ * @param lockType
+ * @return
+ */
+ public static ICarbonLock getLockObject(CarbonTable carbonTable,
+ String lockType) {
+ ICarbonLock carbonLock = CarbonLockFactory
+ .getCarbonLockObj(carbonTable.getAbsoluteTableIdentifier().getCarbonTableIdentifier(),
+ lockType);
+ LOGGER.info("Trying to acquire lock: " + carbonLock);
+ if (carbonLock.lockWithRetries()) {
+ LOGGER.info("Successfully acquired the lock " + carbonLock);
+ } else {
+ throw new RuntimeException("Table is locked for updation. Please try after some time");
+ }
+ return carbonLock;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/403c3d9b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 8e7db45..2e5812c 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -17,9 +17,8 @@
package org.apache.spark.sql.execution.command
-import java.io.File
-
import scala.collection.JavaConverters._
+import scala.collection.mutable.ListBuffer
import scala.language.implicitConversions
import org.apache.commons.lang3.StringUtils
@@ -30,7 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.hive.{CarbonMetastore, CarbonRelation, HiveExternalCatalog}
+import org.apache.spark.sql.hive.{CarbonMetastore, CarbonRelation}
import org.apache.spark.util.FileUtils
import org.codehaus.jackson.map.ObjectMapper
@@ -41,10 +40,10 @@ import org.apache.carbondata.core.cache.dictionary.ManageDictionaryAndBTree
import org.apache.carbondata.core.constants.{CarbonCommonConstants, CarbonLoadOptionConstants}
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.dictionary.server.DictionaryServer
-import org.apache.carbondata.core.locks.{CarbonLockFactory, LockUsage}
+import org.apache.carbondata.core.locks.{CarbonLockFactory, CarbonLockUtil, ICarbonLock, LockUsage}
import org.apache.carbondata.core.metadata.CarbonTableIdentifier
import org.apache.carbondata.core.metadata.encoder.Encoding
-import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo}
+import org.apache.carbondata.core.metadata.schema.table.TableInfo
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension
import org.apache.carbondata.core.mutate.{CarbonUpdateUtil, TupleIdEnum}
import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
@@ -834,24 +833,17 @@ case class CarbonDropTableCommand(ifExistsSet: Boolean,
val dbName = getDB.getDatabaseName(databaseNameOp, sparkSession)
val identifier = TableIdentifier(tableName, Option(dbName))
val carbonTableIdentifier = new CarbonTableIdentifier(dbName, tableName, "")
- val carbonLock = CarbonLockFactory.getCarbonLockObj(carbonTableIdentifier.getDatabaseName,
- carbonTableIdentifier.getTableName + CarbonCommonConstants.UNDERSCORE +
- LockUsage.DROP_TABLE_LOCK)
+ val locksToBeAcquired = List(LockUsage.METADATA_LOCK, LockUsage.DROP_TABLE_LOCK)
val catalog = CarbonEnv.getInstance(sparkSession).carbonMetastore
val storePath = catalog.storePath
- var isLocked = false
catalog.checkSchemasModifiedTimeAndReloadTables()
+ val carbonLocks: scala.collection.mutable.ListBuffer[ICarbonLock] = ListBuffer()
try {
- isLocked = carbonLock.lockWithRetries()
- if (isLocked) {
- logInfo("Successfully able to get the lock for drop.")
- }
- else {
- LOGGER.audit(s"Dropping table $dbName.$tableName failed as the Table is locked")
- sys.error("Table is locked for deletion. Please try after some time")
+ val carbonTable = catalog.getTableFromMetadata(dbName, tableName).map(_.carbonTable).orNull
+ locksToBeAcquired foreach {
+ lock => carbonLocks += CarbonLockUtil.getLockObject(carbonTable, lock)
}
LOGGER.audit(s"Deleting table [$tableName] under database [$dbName]")
- val carbonTable = catalog.getTableFromMetadata(dbName, tableName).map(_.carbonTable).orNull
if (null != carbonTable) {
// clear driver B-tree and dictionary cache
ManageDictionaryAndBTree.clearBTreeAndDictionaryLRUCache(carbonTable)
@@ -859,18 +851,22 @@ case class CarbonDropTableCommand(ifExistsSet: Boolean,
CarbonEnv.getInstance(sparkSession).carbonMetastore
.dropTable(storePath, identifier)(sparkSession)
LOGGER.audit(s"Deleted table [$tableName] under database [$dbName]")
+ } catch {
+ case ex: Exception =>
+ LOGGER.error(ex, s"Dropping table $dbName.$tableName failed")
} finally {
- if (carbonLock != null && isLocked) {
- if (carbonLock.unlock()) {
- logInfo("Table MetaData Unlocked Successfully after dropping the table")
- // deleting any remaining files.
- val metadataFilePath = CarbonStorePath
- .getCarbonTablePath(storePath, carbonTableIdentifier).getMetadataDirectoryPath
- val fileType = FileFactory.getFileType(metadataFilePath)
- if (FileFactory.isFileExist(metadataFilePath, fileType)) {
- val file = FileFactory.getCarbonFile(metadataFilePath, fileType)
- CarbonUtil.deleteFoldersAndFiles(file.getParentFile)
- }
+ if (carbonLocks.nonEmpty) {
+ val unlocked = carbonLocks.forall(_.unlock())
+ if (unlocked) {
+ logInfo("Table MetaData Unlocked Successfully")
+ }
+ // deleting any remaining files.
+ val metadataFilePath = CarbonStorePath
+ .getCarbonTablePath(storePath, carbonTableIdentifier).getMetadataDirectoryPath
+ val fileType = FileFactory.getFileType(metadataFilePath)
+ if (FileFactory.isFileExist(metadataFilePath, fileType)) {
+ val file = FileFactory.getCarbonFile(metadataFilePath, fileType)
+ CarbonUtil.deleteFoldersAndFiles(file.getParentFile)
}
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/403c3d9b/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
index 9e402cd..87717fb 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/util/AlterTableUtil.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.hive.HiveExternalCatalog._
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
-import org.apache.carbondata.core.locks.{CarbonLockFactory, ICarbonLock}
+import org.apache.carbondata.core.locks.{CarbonLockUtil, ICarbonLock}
import org.apache.carbondata.core.metadata.CarbonTableIdentifier
import org.apache.carbondata.core.metadata.schema.table.CarbonTable
import org.apache.carbondata.core.util.path.CarbonStorePath
@@ -65,7 +65,7 @@ object AlterTableUtil {
val acquiredLocks = ListBuffer[ICarbonLock]()
try {
locksToBeAcquired.foreach { lock =>
- acquiredLocks += getLockObject(table, lock)
+ acquiredLocks += CarbonLockUtil.getLockObject(table, lock)
}
acquiredLocks.toList
} catch {
@@ -76,27 +76,6 @@ object AlterTableUtil {
}
/**
- * Given a lock type this method will return a new lock object if not acquired by any other
- * operation
- *
- * @param carbonTable
- * @param lockType
- * @return
- */
- private def getLockObject(carbonTable: CarbonTable,
- lockType: String): ICarbonLock = {
- val carbonLock = CarbonLockFactory
- .getCarbonLockObj(carbonTable.getAbsoluteTableIdentifier.getCarbonTableIdentifier,
- lockType)
- if (carbonLock.lockWithRetries()) {
- LOGGER.info(s"Successfully acquired the lock $lockType")
- } else {
- sys.error("Table is locked for updation. Please try after some time")
- }
- carbonLock
- }
-
- /**
* This method will release the locks acquired for an operation
*
* @param locks
[36/50] [abbrv] carbondata git commit: [CARBONDATA-1266][PRESTO]
Fixed issue for non existing table
Posted by ja...@apache.org.
[CARBONDATA-1266][PRESTO] Fixed issue for non existing table
This closes #1137
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/92d1d970
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/92d1d970
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/92d1d970
Branch: refs/heads/datamap
Commit: 92d1d970a24f4dbd975316a35cf9b38a3ccdb111
Parents: 5af529e
Author: jatin <ja...@knoldus.in>
Authored: Wed Jul 5 17:34:19 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Fri Jul 7 08:37:19 2017 +0800
----------------------------------------------------------------------
.../java/org/apache/carbondata/presto/CarbondataMetadata.java | 6 ------
.../org/apache/carbondata/presto/impl/CarbonTableReader.java | 3 ++-
2 files changed, 2 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92d1d970/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
index 7701490..f2d19cf 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbondataMetadata.java
@@ -115,9 +115,6 @@ public class CarbondataMetadata implements ConnectorMetadata {
}
CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
- if (carbonTable == null) {
- return null;
- }
List<ColumnMetadata> columnsMetaList = new LinkedList<>();
List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(schemaTableName.getTableName());
@@ -148,9 +145,6 @@ public class CarbondataMetadata implements ConnectorMetadata {
//CarbonTable(official struct) is stored in CarbonMetadata(official struct)
CarbonTable cb = carbonTableReader.getTable(handle.getSchemaTableName());
- if (cb == null) {
- throw new TableNotFoundException(handle.getSchemaTableName());
- }
ImmutableMap.Builder<String, ColumnHandle> columnHandles = ImmutableMap.builder();
String tableName = handle.getSchemaTableName().getTableName();
http://git-wip-us.apache.org/repos/asf/carbondata/blob/92d1d970/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
----------------------------------------------------------------------
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
index 54832f5..e4d3ba5 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
@@ -65,6 +65,7 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import static java.util.Objects.requireNonNull;
+import com.facebook.presto.spi.TableNotFoundException;
/** CarbonTableReader will be a facade of these utils
*
@@ -251,7 +252,7 @@ public class CarbonTableReader {
return parseCarbonMetadata(table);
}
- return null;
+ throw new TableNotFoundException(schemaTableName);
}
/**
[33/50] [abbrv] carbondata git commit: [CARBONDATA-1259] CompareTest
improvement This closes #1129
Posted by ja...@apache.org.
[CARBONDATA-1259] CompareTest improvement This closes #1129
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/26d2f1c8
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/26d2f1c8
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/26d2f1c8
Branch: refs/heads/datamap
Commit: 26d2f1c83c64a677221012319d1ec86aee429103
Parents: d9c3b48 327b307
Author: chenliang613 <ch...@apache.org>
Authored: Wed Jul 5 21:35:45 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Wed Jul 5 21:35:45 2017 +0800
----------------------------------------------------------------------
.../carbondata/examples/CompareTest.scala | 103 ++++++++++++-------
1 file changed, 67 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
[46/50] [abbrv] carbondata git commit: [CARBONDATA-1283] Carbon
should continue with default value for wrong value in configured property
Posted by ja...@apache.org.
[CARBONDATA-1283] Carbon should continue with default value for wrong value in configured property
This closes #1155
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1a35cfb9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1a35cfb9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1a35cfb9
Branch: refs/heads/datamap
Commit: 1a35cfb90d0f4a4da05ec80f7a5c192f6832b36d
Parents: cbe1419
Author: mohammadshahidkhan <mo...@gmail.com>
Authored: Mon Jul 10 17:47:16 2017 +0530
Committer: Venkata Ramana G <ra...@huawei.com>
Committed: Wed Jul 12 19:28:44 2017 +0530
----------------------------------------------------------------------
.../core/constants/CarbonCommonConstants.java | 14 ++
.../core/datastore/impl/FileFactory.java | 7 +-
.../carbondata/core/locks/HdfsFileLock.java | 5 +-
.../carbondata/core/util/CarbonProperties.java | 130 ++++++++++++++-
.../apache/carbondata/core/util/CarbonUtil.java | 13 +-
.../core/CarbonPropertiesValidationTest.java | 164 +++++++++++++++++++
6 files changed, 314 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
index 8110abb..ccb6344 100644
--- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
+++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java
@@ -143,6 +143,11 @@ public final class CarbonCommonConstants {
* VIEWFSURL_PREFIX
*/
public static final String VIEWFSURL_PREFIX = "viewfs://";
+
+ /**
+ * ALLUXIO_PREFIX
+ */
+ public static final String ALLUXIOURL_PREFIX = "alluxio://";
/**
* FS_DEFAULT_FS
*/
@@ -329,6 +334,15 @@ public final class CarbonCommonConstants {
*/
public static final String CSV_READ_BUFFER_SIZE_DEFAULT = "50000";
/**
+ * min value for csv read buffer size
+ */
+ public static final int CSV_READ_BUFFER_SIZE_MIN = 10240; //10 kb
+ /**
+ * max value for csv read buffer size
+ */
+ public static final int CSV_READ_BUFFER_SIZE_MAX = 10485760; // 10 mb
+
+ /**
* CSV_READ_COPIES
*/
public static final String DEFAULT_NUMBER_CORES = "2";
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
index 2a35ab3..2794470 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
@@ -28,6 +28,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.FileHolder;
import org.apache.carbondata.core.datastore.filesystem.*;
import org.apache.carbondata.core.util.CarbonUtil;
@@ -74,13 +75,13 @@ public final class FileFactory {
}
public static FileType getFileType(String path) {
- if (path.startsWith(CarbonUtil.HDFS_PREFIX)) {
+ if (path.startsWith(CarbonCommonConstants.HDFSURL_PREFIX)) {
return FileType.HDFS;
}
- else if (path.startsWith(CarbonUtil.ALLUXIO_PREFIX)) {
+ else if (path.startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX)) {
return FileType.ALLUXIO;
}
- else if (path.startsWith(CarbonUtil.VIEWFS_PREFIX)) {
+ else if (path.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX)) {
return FileType.VIEWFS;
}
return FileType.LOCAL;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/main/java/org/apache/carbondata/core/locks/HdfsFileLock.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/locks/HdfsFileLock.java b/core/src/main/java/org/apache/carbondata/core/locks/HdfsFileLock.java
index 94e7307..326f8ae 100644
--- a/core/src/main/java/org/apache/carbondata/core/locks/HdfsFileLock.java
+++ b/core/src/main/java/org/apache/carbondata/core/locks/HdfsFileLock.java
@@ -54,8 +54,9 @@ public class HdfsFileLock extends AbstractCarbonLock {
// If can not get the STORE_LOCATION, then use hadoop.tmp.dir .
tmpPath = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.STORE_LOCATION,
System.getProperty(CarbonCommonConstants.HDFS_TEMP_LOCATION));
- if (!tmpPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX)
- && !tmpPath.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX)) {
+ if (!tmpPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) && !tmpPath
+ .startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX) && !tmpPath
+ .startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX)) {
tmpPath = hdfsPath + tmpPath;
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
index c1e70ff..c9dd1ec 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java
@@ -33,6 +33,8 @@ import org.apache.carbondata.core.constants.CarbonLoadOptionConstants;
import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+import org.apache.hadoop.conf.Configuration;
+
public final class CarbonProperties {
/**
* Attribute for Carbon LOGGER.
@@ -98,6 +100,124 @@ public final class CarbonProperties {
validateBlockletGroupSizeInMB();
validateNumberOfColumnPerIORead();
validateNumberOfRowsPerBlockletColumnPage();
+ validateEnableUnsafeSort();
+ validateCustomBlockDistribution();
+ validateEnableVectorReader();
+ validateLockType();
+ validateCarbonCSVReadBufferSizeByte();
+ }
+
+ private void validateCarbonCSVReadBufferSizeByte() {
+ String csvReadBufferSizeStr =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ if (null != csvReadBufferSizeStr) {
+ try {
+ int bufferSize = Integer.parseInt(csvReadBufferSizeStr);
+ if (bufferSize < CarbonCommonConstants.CSV_READ_BUFFER_SIZE_MIN
+ || bufferSize > CarbonCommonConstants.CSV_READ_BUFFER_SIZE_MAX) {
+ LOGGER.warn("The value \"" + csvReadBufferSizeStr + "\" configured for key "
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE
+ + "\" is not in range. Valid range is (byte) \""
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE_MIN + " to \""
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE_MAX + ". Using the default value \""
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT);
+ carbonProperties.setProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE,
+ CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT);
+ }
+ } catch (NumberFormatException nfe) {
+ LOGGER.warn("The value \"" + csvReadBufferSizeStr + "\" configured for key "
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE
+ + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT);
+ carbonProperties.setProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE,
+ CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT);
+ }
+ }
+ }
+
+ private void validateLockType() {
+ String lockTypeConfigured = carbonProperties.getProperty(CarbonCommonConstants.LOCK_TYPE);
+ if (null != lockTypeConfigured) {
+ switch (lockTypeConfigured.toUpperCase()) {
+ // if user is setting the lock type as CARBON_LOCK_TYPE_ZOOKEEPER then no need to validate
+ // else validate based on the file system type for LOCAL file system lock will be
+ // CARBON_LOCK_TYPE_LOCAL and for the distributed one CARBON_LOCK_TYPE_HDFS
+ case CarbonCommonConstants.CARBON_LOCK_TYPE_ZOOKEEPER:
+ break;
+ case CarbonCommonConstants.CARBON_LOCK_TYPE_LOCAL:
+ case CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS:
+ default:
+ validateAndConfigureLockType(lockTypeConfigured);
+ }
+ } else {
+ validateAndConfigureLockType(lockTypeConfigured);
+ }
+ }
+
+ /**
+ * the method decide and set the lock type based on the configured system type
+ *
+ * @param lockTypeConfigured
+ */
+ private void validateAndConfigureLockType(String lockTypeConfigured) {
+ Configuration configuration = new Configuration(true);
+ String defaultFs = configuration.get("fs.defaultFS");
+ if (null != defaultFs && (defaultFs.startsWith(CarbonCommonConstants.HDFSURL_PREFIX)
+ || defaultFs.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX) || defaultFs
+ .startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX))
+ && !CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS.equalsIgnoreCase(lockTypeConfigured)) {
+ LOGGER.warn("The value \"" + lockTypeConfigured + "\" configured for key "
+ + CarbonCommonConstants.LOCK_TYPE + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS);
+ carbonProperties.setProperty(CarbonCommonConstants.LOCK_TYPE,
+ CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS);
+ } else if (null != defaultFs && defaultFs.startsWith(CarbonCommonConstants.LOCAL_FILE_PREFIX)
+ && !CarbonCommonConstants.CARBON_LOCK_TYPE_LOCAL.equalsIgnoreCase(lockTypeConfigured)) {
+ carbonProperties.setProperty(CarbonCommonConstants.LOCK_TYPE,
+ CarbonCommonConstants.CARBON_LOCK_TYPE_LOCAL);
+ LOGGER.warn("The value \"" + lockTypeConfigured + "\" configured for key "
+ + CarbonCommonConstants.LOCK_TYPE
+ + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.CARBON_LOCK_TYPE_LOCAL);
+ }
+ }
+
+ private void validateEnableVectorReader() {
+ String vectorReaderStr =
+ carbonProperties.getProperty(CarbonCommonConstants.ENABLE_VECTOR_READER);
+ boolean isValidBooleanValue = CarbonUtil.validateBoolean(vectorReaderStr);
+ if (!isValidBooleanValue) {
+ LOGGER.warn("The enable vector reader value \"" + vectorReaderStr
+ + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT);
+ carbonProperties.setProperty(CarbonCommonConstants.ENABLE_VECTOR_READER,
+ CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT);
+ }
+ }
+
+ private void validateCustomBlockDistribution() {
+ String customBlockDistributionStr =
+ carbonProperties.getProperty(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION);
+ boolean isValidBooleanValue = CarbonUtil.validateBoolean(customBlockDistributionStr);
+ if (!isValidBooleanValue) {
+ LOGGER.warn("The custom block distribution value \"" + customBlockDistributionStr
+ + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION_DEFAULT);
+ carbonProperties.setProperty(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION,
+ CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION_DEFAULT);
+ }
+ }
+
+ private void validateEnableUnsafeSort() {
+ String unSafeSortStr = carbonProperties.getProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT);
+ boolean isValidBooleanValue = CarbonUtil.validateBoolean(unSafeSortStr);
+ if (!isValidBooleanValue) {
+ LOGGER.warn("The enable unsafe sort value \"" + unSafeSortStr
+ + "\" is invalid. Using the default value \""
+ + CarbonCommonConstants.ENABLE_UNSAFE_SORT_DEFAULT);
+ carbonProperties.setProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT,
+ CarbonCommonConstants.ENABLE_UNSAFE_SORT_DEFAULT);
+ }
}
private void initPropertySet() throws IllegalAccessException {
@@ -330,12 +450,10 @@ public final class CarbonProperties {
}
private void validateHighCardinalityIdentify() {
- String highcardIdentifyStr = carbonProperties
- .getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE,
- CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT);
- try {
- Boolean.parseBoolean(highcardIdentifyStr);
- } catch (NumberFormatException e) {
+ String highcardIdentifyStr =
+ carbonProperties.getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE);
+ boolean validateBoolean = CarbonUtil.validateBoolean(highcardIdentifyStr);
+ if (!validateBoolean) {
LOGGER.info("The high cardinality identify value \"" + highcardIdentifyStr
+ "\" is invalid. Using the default value \""
+ CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT);
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index 06b2a61..1b08263 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -84,11 +84,6 @@ import org.apache.thrift.transport.TIOStreamTransport;
public final class CarbonUtil {
- public static final String HDFS_PREFIX = "hdfs://";
- public static final String VIEWFS_PREFIX = "viewfs://";
- public static final String ALLUXIO_PREFIX = "alluxio://";
- private static final String FS_DEFAULT_FS = "fs.defaultFS";
-
/**
* Attribute for Carbon LOGGER
*/
@@ -697,7 +692,7 @@ public final class CarbonUtil {
*/
public static String checkAndAppendHDFSUrl(String filePath) {
String currentPath = filePath;
- String defaultFsUrl = conf.get(FS_DEFAULT_FS);
+ String defaultFsUrl = conf.get(CarbonCommonConstants.FS_DEFAULT_FS);
String baseDFSUrl = CarbonProperties.getInstance()
.getProperty(CarbonCommonConstants.CARBON_DDL_BASE_HDFS_URL, "");
if (checkIfPrefixExists(filePath)) {
@@ -721,8 +716,10 @@ public final class CarbonUtil {
private static boolean checkIfPrefixExists(String path) {
final String lowerPath = path.toLowerCase();
- return lowerPath.startsWith(HDFS_PREFIX) || lowerPath.startsWith(VIEWFS_PREFIX) || lowerPath
- .startsWith("file://") || lowerPath.startsWith(ALLUXIO_PREFIX);
+ return lowerPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) || lowerPath
+ .startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX) || lowerPath
+ .startsWith(CarbonCommonConstants.LOCAL_FILE_PREFIX) || lowerPath
+ .startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX);
}
public static String getCarbonStorePath() {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/1a35cfb9/core/src/test/java/org/apache/carbondata/core/CarbonPropertiesValidationTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/carbondata/core/CarbonPropertiesValidationTest.java b/core/src/test/java/org/apache/carbondata/core/CarbonPropertiesValidationTest.java
new file mode 100644
index 0000000..e0262dc
--- /dev/null
+++ b/core/src/test/java/org/apache/carbondata/core/CarbonPropertiesValidationTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.core;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.util.CarbonProperties;
+import org.apache.carbondata.core.util.CarbonProperty;
+
+import junit.framework.TestCase;
+import org.junit.Test;
+
+/**
+ * Method to test the carbon common constant configurations.
+ */
+public class CarbonPropertiesValidationTest extends TestCase {
+
+ CarbonProperties carbonProperties;
+
+ @Override public void setUp() throws Exception {
+ carbonProperties = CarbonProperties.getInstance();
+ }
+
+ @Test public void testvalidateLockType()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType = carbonProperties.getClass().getDeclaredMethod("validateLockType");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.LOCK_TYPE, "xyz");
+ String valueBeforeValidation = carbonProperties.getProperty(CarbonCommonConstants.LOCK_TYPE);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation = carbonProperties.getProperty(CarbonCommonConstants.LOCK_TYPE);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(CarbonCommonConstants.CARBON_LOCK_TYPE_LOCAL.equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateEnableUnsafeSort()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateEnableUnsafeSort");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT, "xyz");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.ENABLE_UNSAFE_SORT);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(
+ CarbonCommonConstants.ENABLE_UNSAFE_SORT_DEFAULT.equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateCustomBlockDistribution()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateCustomBlockDistribution");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION, "xyz");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION_DEFAULT
+ .equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateEnableVectorReader()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateEnableVectorReader");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.ENABLE_VECTOR_READER, "xyz");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.ENABLE_VECTOR_READER);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.ENABLE_VECTOR_READER);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(
+ CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT.equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateCarbonCSVReadBufferSizeByte()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateCarbonCSVReadBufferSizeByte");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, "xyz");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(
+ CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT.equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateCarbonCSVReadBufferSizeByteRange()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateCarbonCSVReadBufferSizeByte");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, "10485761");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(
+ CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT.equalsIgnoreCase(valueAfterValidation));
+ carbonProperties.addProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, "10240");
+ valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ validateMethodType.invoke(carbonProperties);
+ valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ assertTrue(valueBeforeValidation.equals(valueAfterValidation));
+ carbonProperties.addProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE, "10239");
+ valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ validateMethodType.invoke(carbonProperties);
+ valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.CSV_READ_BUFFER_SIZE);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(
+ CarbonCommonConstants.CSV_READ_BUFFER_SIZE_DEFAULT.equalsIgnoreCase(valueAfterValidation));
+ }
+
+ @Test public void testValidateHighCardinalityIdentify()
+ throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
+ Method validateMethodType =
+ carbonProperties.getClass().getDeclaredMethod("validateHighCardinalityIdentify");
+ validateMethodType.setAccessible(true);
+ carbonProperties.addProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE, "xyz");
+ String valueBeforeValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE);
+ validateMethodType.invoke(carbonProperties);
+ String valueAfterValidation =
+ carbonProperties.getProperty(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE);
+ assertTrue(!valueBeforeValidation.equals(valueAfterValidation));
+ assertTrue(CarbonCommonConstants.HIGH_CARDINALITY_IDENTIFY_ENABLE_DEFAULT
+ .equalsIgnoreCase(valueAfterValidation));
+ }
+}
[40/50] [abbrv] carbondata git commit: [CARBONDATA-1267] Add
short_int case branch in DeltaIntegalCodec
Posted by ja...@apache.org.
[CARBONDATA-1267] Add short_int case branch in DeltaIntegalCodec
This closes #1139
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c6bc1f07
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c6bc1f07
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c6bc1f07
Branch: refs/heads/datamap
Commit: c6bc1f07180c4740ba9b3e518ead34daa093be1c
Parents: 0558c28
Author: xuchuanyin <xu...@hust.edu.cn>
Authored: Wed Jul 5 21:00:45 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Mon Jul 10 21:19:45 2017 +0800
----------------------------------------------------------------------
.../page/encoding/DeltaIntegerCodec.java | 12 +++++++
.../test/resources/short_int_as_target_type.csv | 3 ++
.../aggquery/IntegerDataTypeTestCase.scala | 33 +++++++++++++++++++-
3 files changed, 47 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c6bc1f07/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegerCodec.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegerCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegerCodec.java
index b77f7a2..6cf59a6 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegerCodec.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DeltaIntegerCodec.java
@@ -120,6 +120,9 @@ public class DeltaIntegerCodec extends AdaptiveCompressionCodec {
case SHORT:
encodedPage.putShort(rowId, (short)(max - value));
break;
+ case SHORT_INT:
+ encodedPage.putShortInt(rowId, (int)(max - value));
+ break;
case INT:
encodedPage.putInt(rowId, (int)(max - value));
break;
@@ -137,6 +140,9 @@ public class DeltaIntegerCodec extends AdaptiveCompressionCodec {
case SHORT:
encodedPage.putShort(rowId, (short)(max - value));
break;
+ case SHORT_INT:
+ encodedPage.putShortInt(rowId, (int)(max - value));
+ break;
case INT:
encodedPage.putInt(rowId, (int)(max - value));
break;
@@ -157,6 +163,9 @@ public class DeltaIntegerCodec extends AdaptiveCompressionCodec {
case SHORT:
encodedPage.putShort(rowId, (short)(max - value));
break;
+ case SHORT_INT:
+ encodedPage.putShortInt(rowId, (int)(max - value));
+ break;
case INT:
encodedPage.putInt(rowId, (int)(max - value));
break;
@@ -177,6 +186,9 @@ public class DeltaIntegerCodec extends AdaptiveCompressionCodec {
case SHORT:
encodedPage.putShort(rowId, (short)(max - value));
break;
+ case SHORT_INT:
+ encodedPage.putShortInt(rowId, (int)(max - value));
+ break;
case INT:
encodedPage.putInt(rowId, (int)(max - value));
break;
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c6bc1f07/integration/spark-common-test/src/test/resources/short_int_as_target_type.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/short_int_as_target_type.csv b/integration/spark-common-test/src/test/resources/short_int_as_target_type.csv
new file mode 100644
index 0000000..964517e
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/short_int_as_target_type.csv
@@ -0,0 +1,3 @@
+begin_time,name
+1497376581,name1
+1497423838,name2
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c6bc1f07/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/aggquery/IntegerDataTypeTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/aggquery/IntegerDataTypeTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/aggquery/IntegerDataTypeTestCase.scala
index 8e19684..f918301 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/aggquery/IntegerDataTypeTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/aggquery/IntegerDataTypeTestCase.scala
@@ -112,6 +112,37 @@ class IntegerDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
""".stripMargin)
}
+ test("short int as target type in deltaIntegerCodec") {
+ sql(
+ """
+ | DROP TABLE IF EXISTS short_int_target_table
+ """.stripMargin)
+
+ //begin_time column will be encoded by deltaIntegerCodec
+ sql(
+ """
+ | CREATE TABLE short_int_target_table
+ | (begin_time bigint, name string)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ sql(
+ s"""
+ | LOAD DATA LOCAL INPATH '$resourcesPath/short_int_as_target_type.csv'
+ | INTO TABLE short_int_target_table
+ """.stripMargin)
+
+ checkAnswer(
+ sql("select begin_time from short_int_target_table"),
+ Seq(Row(1497376581), Row(1497423838))
+ )
+
+ sql(
+ """
+ | DROP TABLE short_int_target_table
+ """.stripMargin)
+ }
+
override def afterAll {
sql("drop table if exists integertypetableAgg")
CarbonProperties.getInstance().addProperty(
@@ -119,4 +150,4 @@ class IntegerDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE_LOADING_DEFAULT
)
}
-}
\ No newline at end of file
+}
[21/50] [abbrv] carbondata git commit: Fixed described formatted for
sort_columns after alter
Posted by ja...@apache.org.
Fixed described formatted for sort_columns after alter
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e9329ee7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e9329ee7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e9329ee7
Branch: refs/heads/datamap
Commit: e9329ee7c1adc913d6e65c970e6312a5b18c6ec2
Parents: 5f9741e
Author: Ayush Mantri <aa...@gmail.com>
Authored: Thu Jun 29 15:18:20 2017 +0530
Committer: kumarvishal <ku...@gmail.com>
Committed: Mon Jul 3 20:30:18 2017 +0530
----------------------------------------------------------------------
.../core/metadata/schema/table/CarbonTable.java | 19 +++++++++++++++++++
.../execution/command/carbonTableSchema.scala | 6 +++---
.../execution/command/carbonTableSchema.scala | 6 +++---
.../AlterTableValidationTestCase.scala | 19 ++++++++++++++++++-
4 files changed, 43 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9329ee7/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index ae97262..16ded57 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -691,6 +691,25 @@ public class CarbonTable implements Serializable {
tableMeasuresMap.put(tableName, visibleMeasures);
}
+ /**
+ * Method to get the list of sort columns
+ *
+ * @param tableName
+ * @return List of Sort column
+ */
+ public List<String> getSortColumns(String tableName) {
+ List<String> sort_columsList = new ArrayList<String>(allDimensions.size());
+ List<CarbonDimension> carbonDimensions = tableDimensionsMap.get(tableName);
+ for (CarbonDimension dim : carbonDimensions) {
+ if (dim.isSortColumn()) {
+ sort_columsList.add(dim.getColName());
+ }
+ }
+ return sort_columsList;
+ }
+
+
+
public int getNumberOfSortColumns() {
return numberOfSortColumns;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9329ee7/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 3477abb..70c8407 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -851,9 +851,9 @@ private[sql] case class DescribeCommandFormatted(
} else {
results ++= Seq(("ADAPTIVE", "", ""))
}
- results ++= Seq(("SORT_COLUMNS", relation.metaData.carbonTable.getAllDimensions
- .subList(0, relation.metaData.carbonTable.getNumberOfSortColumns).asScala
- .map(column => column.getColName).mkString(","), ""))
+ results ++= Seq(("SORT_COLUMNS", relation.metaData.carbonTable.getSortColumns(
+ relation.tableMeta.carbonTableIdentifier.getTableName).asScala
+ .map(column => column).mkString(","), ""))
val dimension = carbonTable
.getDimensionByTableName(relation.tableMeta.carbonTableIdentifier.getTableName)
results ++= getColumnGroups(dimension.asScala.toList)
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9329ee7/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 8fe4bd7..bb5bdd1 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -902,9 +902,9 @@ private[sql] case class DescribeCommandFormatted(
} else {
results ++= Seq(("ADAPTIVE", "", ""))
}
- results ++= Seq(("SORT_COLUMNS", relation.metaData.carbonTable.getAllDimensions
- .subList(0, relation.metaData.carbonTable.getNumberOfSortColumns).asScala
- .map(column => column.getColName).mkString(","), ""))
+ results ++= Seq(("SORT_COLUMNS", relation.metaData.carbonTable.getSortColumns(
+ relation.tableMeta.carbonTableIdentifier.getTableName).asScala
+ .map(column => column).mkString(","), ""))
val dimension = carbonTable
.getDimensionByTableName(relation.tableMeta.carbonTableIdentifier.getTableName)
results ++= getColumnGroups(dimension.asScala.toList)
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9329ee7/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
index 972b0c2..ae64498 100644
--- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
+++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/restructure/AlterTableValidationTestCase.scala
@@ -423,7 +423,22 @@ class AlterTableValidationTestCase extends QueryTest with BeforeAndAfterAll {
sql("alter table Default.uniqdata rename to uniqdata1")
checkAnswer(sql("select * from Default.uniqdata1"), Row(1,"hello"))
}
-
+ test("describe formatted for default sort_columns pre and post alter") {
+ sql("CREATE TABLE defaultSortColumnsWithAlter (empno int, empname String, designation String,role String, doj Timestamp) STORED BY 'org.apache.carbondata.format' " +
+ "tblproperties('DICTIONARY_INCLUDE'='empno','DICTIONARY_EXCLUDE'='role')")
+ sql("alter table defaultSortColumnsWithAlter drop columns (designation)")
+ sql("alter table defaultSortColumnsWithAlter add columns (designation12 String)")
+ checkExistence(sql("describe formatted defaultSortColumnsWithAlter"),true,"SORT_COLUMNS")
+ checkExistence(sql("describe formatted defaultSortColumnsWithAlter"),true,"empno,empname,role,doj")
+ }
+ test("describe formatted for specified sort_columns pre and post alter") {
+ sql("CREATE TABLE specifiedSortColumnsWithAlter (empno int, empname String, designation String,role String, doj Timestamp) STORED BY 'org.apache.carbondata.format' " +
+ "tblproperties('sort_columns'='empno,empname,designation,role,doj','DICTIONARY_INCLUDE'='empno','DICTIONARY_EXCLUDE'='role')")
+ sql("alter table specifiedSortColumnsWithAlter drop columns (designation)")
+ sql("alter table specifiedSortColumnsWithAlter add columns (designation12 String)")
+ checkExistence(sql("describe formatted specifiedSortColumnsWithAlter"),true,"SORT_COLUMNS")
+ checkExistence(sql("describe formatted specifiedSortColumnsWithAlter"),true,"empno,empname,role,doj")
+ }
override def afterAll {
sql("DROP TABLE IF EXISTS restructure")
sql("DROP TABLE IF EXISTS restructure_new")
@@ -432,5 +447,7 @@ class AlterTableValidationTestCase extends QueryTest with BeforeAndAfterAll {
sql("DROP TABLE IF EXISTS restructure_badnew")
sql("DROP TABLE IF EXISTS lock_rename")
sql("drop table if exists uniqdata")
+ sql("drop table if exists defaultSortColumnsWithAlter")
+ sql("drop table if exists specifiedSortColumnsWithAlter")
}
}
[16/50] [abbrv] carbondata git commit: Remove the fixed spark.version
in submodule for supporting new spark version This closes #1017
Posted by ja...@apache.org.
Remove the fixed spark.version in submodule for supporting new spark version This closes #1017
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/924f0b7a
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/924f0b7a
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/924f0b7a
Branch: refs/heads/datamap
Commit: 924f0b7a77fb1b5833ebec5b55c31b39fc908148
Parents: 92ba101 d3975a9
Author: chenliang613 <ch...@apache.org>
Authored: Sat Jul 1 22:46:16 2017 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Jul 1 22:46:16 2017 +0800
----------------------------------------------------------------------
examples/spark2/pom.xml | 3 ---
integration/spark2/pom.xml | 3 ---
2 files changed, 6 deletions(-)
----------------------------------------------------------------------