You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ch...@apache.org on 2016/11/26 02:29:41 UTC
[1/2] incubator-carbondata git commit: remove unnecessary projection
Repository: incubator-carbondata
Updated Branches:
refs/heads/master e05c0d5da -> 739afbd71
remove unnecessary projection
fix
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/51b23544
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/51b23544
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/51b23544
Branch: refs/heads/master
Commit: 51b235447e5d40202a0588d1da1b171d72d7bd33
Parents: e05c0d5
Author: jackylk <ja...@huawei.com>
Authored: Sat Nov 26 09:48:14 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Nov 26 10:28:15 2016 +0800
----------------------------------------------------------------------
.../spark/sql/hive/CarbonStrategies.scala | 48 +++++++++++++-------
.../AllDataTypesTestCaseAggregate.scala | 4 +-
.../AllDataTypesTestCaseAggregate.scala | 5 ++
.../filterexpr/FilterProcessorTestCase.scala | 3 +-
4 files changed, 38 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
index a755d5d..1e944f7 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
@@ -22,9 +22,10 @@ import java.util
import scala.collection.JavaConverters._
import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{expressions, TableIdentifier}
import org.apache.spark.sql.catalyst.CarbonTableIdentifierImplicit._
+import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.expressions.{AttributeSet, _}
import org.apache.spark.sql.catalyst.planning.{PhysicalOperation, QueryPlanner}
import org.apache.spark.sql.catalyst.plans.logical.{Filter => LogicalFilter, LogicalPlan}
@@ -39,7 +40,6 @@ import org.apache.spark.sql.types.IntegerType
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.spark.exception.MalformedCarbonCommandException
-
class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
override def strategies: Seq[Strategy] = getStrategies
@@ -88,35 +88,49 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
relation.carbonRelation.metaData.carbonTable.getFactTableName.toLowerCase
// Check out any expressions are there in project list. if they are present then we need to
// decode them as well.
+
val projectSet = AttributeSet(projectList.flatMap(_.references))
- val scan = CarbonScan(projectSet.toSeq,
- relation.carbonRelation,
- predicates)(sqlContext)
+ val filterSet = AttributeSet(predicates.flatMap(_.references))
+
+ val scan = CarbonScan(projectSet.toSeq, relation.carbonRelation, predicates)(sqlContext)
projectList.map {
case attr: AttributeReference =>
case Alias(attr: AttributeReference, _) =>
case others =>
- others.references.map{f =>
+ others.references.map { f =>
val dictionary = relation.carbonRelation.metaData.dictionaryMap.get(f.name)
if (dictionary.isDefined && dictionary.get) {
scan.attributesNeedToDecode.add(f.asInstanceOf[AttributeReference])
}
}
}
- if (scan.attributesNeedToDecode.size() > 0) {
- val decoder = getCarbonDecoder(logicalRelation,
- sc,
- tableName,
- scan.attributesNeedToDecode.asScala.toSeq,
- scan)
- if (scan.unprocessedExprs.nonEmpty) {
- val filterCondToAdd = scan.unprocessedExprs.reduceLeftOption(expressions.And)
- Project(projectList, filterCondToAdd.map(Filter(_, decoder)).getOrElse(decoder))
+ val scanWithDecoder =
+ if (scan.attributesNeedToDecode.size() > 0) {
+ val decoder = getCarbonDecoder(logicalRelation,
+ sc,
+ tableName,
+ scan.attributesNeedToDecode.asScala.toSeq,
+ scan)
+ if (scan.unprocessedExprs.nonEmpty) {
+ val filterCondToAdd = scan.unprocessedExprs.reduceLeftOption(expressions.And)
+ filterCondToAdd.map(Filter(_, decoder)).getOrElse(decoder)
+ } else {
+ decoder
+ }
} else {
- Project(projectList, decoder)
+ scan
}
+
+ if (projectList.map(_.toAttribute) == scan.attributesRaw &&
+ projectSet.size == projectList.size &&
+ filterSet.subsetOf(projectSet)) {
+ // copied from spark pruneFilterProjectRaw
+ // When it is possible to just use column pruning to get the right projection and
+ // when the columns of this projection are enough to evaluate all filter conditions,
+ // just do a scan with no extra project.
+ scanWithDecoder
} else {
- Project(projectList, scan)
+ Project(projectList, scanWithDecoder)
}
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
index 94aa63c..f8b126e 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/aggquery/AllDataTypesTestCaseAggregate.scala
@@ -19,7 +19,6 @@
package org.apache.carbondata.spark.testsuite.aggquery
-import org.apache.spark.sql.Row
import org.apache.spark.sql.common.util.CarbonHiveContext._
import org.apache.spark.sql.common.util.QueryTest
import org.scalatest.BeforeAndAfterAll
@@ -37,6 +36,7 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-yyyy")
sql("DROP TABLE IF EXISTS alldatatypestableAGG")
+ sql("DROP TABLE IF EXISTS alldatatypescubeAGG_hive")
sql(
"CREATE TABLE alldatatypestableAGG (empno int, empname String, designation String, doj " +
"Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, deptname " +
@@ -53,8 +53,6 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll {
"int,utilization int,salary int)row format delimited fields terminated by ','")
sql(
"LOAD DATA LOCAL INPATH './src/test/resources/datawithoutheader.csv' INTO TABLE alldatatypescubeAGG_hive")
-
-
}
test(
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
index f02d4e7..c6a1405 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala
@@ -38,6 +38,11 @@ class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll {
clean
val currentDirectory = new File(this.getClass.getResource("/").getPath + "/../../")
.getCanonicalPath
+
+ sql("drop table if exists Carbon_automation_test")
+ sql("drop table if exists Carbon_automation_hive")
+ sql("drop table if exists Carbon_automation_test_hive")
+
sql("create table if not exists Carbon_automation_test (imei string,deviceInformationId int,MAC string,deviceColor string,device_backColor string,modelId string,marketName string,AMSize string,ROMSize string,CUPAudit string,CPIClocked string,series string,productionDate timestamp,bomCode string,internalModels string, deliveryTime string, channelsId string, channelsName string , deliveryAreaId string, deliveryCountry string, deliveryProvince string, deliveryCity string,deliveryDistrict string, deliveryStreet string, oxSingleNumber string, ActiveCheckTime string, ActiveAreaId string, ActiveCountry string, ActiveProvince string, Activecity string, ActiveDistrict string, ActiveStreet string, ActiveOperatorId string, Active_releaseId string, Active_EMUIVersion string, Active_operaSysVersion string, Active_BacVerNumber string, Active_BacFlashVer string, Active_webUIVersion string, Active_webUITypeCarrVer string,Active_webTypeDataVerNumber string, Active_operatorsVersion string, Active
_phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int, Latest_DAY int, Latest_HOUR string, Latest_areaId string, Latest_country string, Latest_province string, Latest_city string, Latest_district string, Latest_street string, Latest_releaseId string, Latest_EMUIVersion string, Latest_operaSysVersion string, Latest_BacVerNumber string, Latest_BacFlashVer string, Latest_webUIVersion string, Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string, Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string, Latest_operatorId string, gamePointDescription string, gamePointId int,contractNumber int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('DICTIONARY_INCLUDE'='Latest_MONTH,Latest_DAY,deviceInformationId')");
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/51b23544/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
index 7e29cd1..9b3ac45 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/FilterProcessorTestCase.scala
@@ -51,7 +51,7 @@ class FilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
)
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "MM-dd-yyyy HH:mm:ss")
-
+
sql("CREATE TABLE filterTimestampDataType (ID int, date Timestamp, country String, " +
"name String, phonetype String, serialname String, salary int) " +
"STORED BY 'org.apache.carbondata.format'"
@@ -129,7 +129,6 @@ class FilterProcessorTestCase extends QueryTest with BeforeAndAfterAll {
sql("load data local inpath './src/test/resources/big_int_Decimal.csv' into table big_int_basicc_Hive_1")
}
-
test("Is not null filter") {
checkAnswer(
sql("select id from filtertestTablesWithNull " + "where id is not null"),
[2/2] incubator-carbondata git commit: [CARBONDATA-108] Remove
project in strategy This closes #55
Posted by ch...@apache.org.
[CARBONDATA-108] Remove project in strategy This closes #55
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/739afbd7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/739afbd7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/739afbd7
Branch: refs/heads/master
Commit: 739afbd71ab5fd2cb9d6fa92dda2e0358b4bea8a
Parents: e05c0d5 51b2354
Author: chenliang613 <ch...@apache.org>
Authored: Sat Nov 26 10:29:27 2016 +0800
Committer: chenliang613 <ch...@apache.org>
Committed: Sat Nov 26 10:29:27 2016 +0800
----------------------------------------------------------------------
.../spark/sql/hive/CarbonStrategies.scala | 48 +++++++++++++-------
.../AllDataTypesTestCaseAggregate.scala | 4 +-
.../AllDataTypesTestCaseAggregate.scala | 5 ++
.../filterexpr/FilterProcessorTestCase.scala | 3 +-
4 files changed, 38 insertions(+), 22 deletions(-)
----------------------------------------------------------------------