You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/06/13 14:37:52 UTC
[carbondata] branch master updated: [CARBONDATA-3416]Correct the
preparing of carbon analyzer with custom rules with spark analyzer
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 1118886 [CARBONDATA-3416]Correct the preparing of carbon analyzer with custom rules with spark analyzer
1118886 is described below
commit 1118886d8945c37092d9dc5208421387713da86e
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Thu Jun 6 16:15:49 2019 +0530
[CARBONDATA-3416]Correct the preparing of carbon analyzer with custom rules with spark analyzer
Problem:
When new analyzer rule added in spark, not reflecting in carbon.
Carbon prepares the session state builder by extending the
hivesession state builder, and create new analyzer by overiding
all the rules added by spark, so when new rule is added in spark,
it will not be reflected in carbon as we have overridden the complete analyzer
Solution
While making the new analyzer in carbon side, better to get all the
rules from super class and add the carbon rules in analyzer,
so that when new rules are added in spark side, since we take super.rules,
we get all the updated rules from spark, before adding the carbon custom rules.
This closes #3261
---
.../sql/hive/CarbonInMemorySessionState.scala | 35 +++++++++++++++-------
.../apache/spark/sql/hive/CarbonSessionState.scala | 34 +++++++++++----------
2 files changed, 43 insertions(+), 26 deletions(-)
diff --git a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
index da60fb0..e286fba 100644
--- a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
+++ b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala
@@ -254,22 +254,35 @@ class CarbonInMemorySessionStateBuilder (sparkSession: SparkSession,
override lazy val optimizer: Optimizer = new CarbonOptimizer(catalog, conf, experimentalMethods)
- override protected def analyzer: Analyzer = new CarbonAnalyzer(catalog, conf, sparkSession,
+ override protected def analyzer: Analyzer = {
+ new CarbonAnalyzer(catalog,
+ conf,
+ sparkSession,
+ getAnalyzer(super.analyzer))
+ }
+
+ /**
+ * This method adds carbon rules to Hive Analyzer and returns new analyzer
+ *
+ * @param analyzer SessionStateBuilder analyzer
+ * @return
+ */
+ def getAnalyzer(analyzer: Analyzer): Analyzer = {
new Analyzer(catalog, conf) {
+
override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
- new FindDataSourceTable(session) +:
- new ResolveSQLOnFile(session) +:
- new CarbonIUDAnalysisRule(sparkSession) +:
- new CarbonPreInsertionCasts(sparkSession) +: customResolutionRules
+ analyzer.extendedResolutionRules ++
+ Seq(CarbonIUDAnalysisRule(sparkSession)) ++
+ Seq(CarbonPreInsertionCasts(sparkSession)) ++ customResolutionRules
+
override val extendedCheckRules: Seq[LogicalPlan => Unit] =
- PreWriteCheck :: HiveOnlyCheck :: Nil
+ analyzer.extendedCheckRules
+
override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
- PreprocessTableCreation(session) +:
- PreprocessTableInsertion(conf) +:
- DataSourceAnalysis(conf) +:
- customPostHocResolutionRules
+ analyzer.postHocResolutionRules
}
- )
+ }
+
override protected def newBuilder: NewBuilder = new CarbonInMemorySessionStateBuilder(_, _)
}
diff --git a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionState.scala b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionState.scala
index 0c60e71..08cf3cc 100644
--- a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionState.scala
+++ b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonSessionState.scala
@@ -236,29 +236,33 @@ class CarbonSessionStateBuilder(sparkSession: SparkSession,
override lazy val optimizer: Optimizer = new CarbonOptimizer(catalog, conf, experimentalMethods)
- override protected def analyzer: Analyzer = new CarbonAnalyzer(catalog, conf, sparkSession,
+ override protected def analyzer: Analyzer = {
+ new CarbonAnalyzer(catalog,
+ conf,
+ sparkSession,
+ getAnalyzer(super.analyzer))
+ }
+
+ /**
+ * This method adds carbon rules to Hive Analyzer and returns new analyzer
+ * @param analyzer hiveSessionStateBuilder analyzer
+ * @return
+ */
+ def getAnalyzer(analyzer: Analyzer): Analyzer = {
new Analyzer(catalog, conf) {
override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
- new ResolveHiveSerdeTable(session) +:
- new FindDataSourceTable(session) +:
- new ResolveSQLOnFile(session) +:
- new CarbonIUDAnalysisRule(sparkSession) +:
- new CarbonPreInsertionCasts(sparkSession) +: customResolutionRules
+ analyzer.extendedResolutionRules ++
+ Seq(CarbonIUDAnalysisRule(sparkSession)) ++
+ Seq(CarbonPreInsertionCasts(sparkSession)) ++ customResolutionRules
override val extendedCheckRules: Seq[LogicalPlan => Unit] =
- PreWriteCheck :: HiveOnlyCheck :: Nil
+ analyzer.extendedCheckRules
override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
- new DetermineTableStats(session) +:
- RelationConversions(conf, catalog) +:
- PreprocessTableCreation(session) +:
- PreprocessTableInsertion(conf) +:
- DataSourceAnalysis(conf) +:
- HiveAnalysis +:
- customPostHocResolutionRules
+ analyzer.postHocResolutionRules
}
- )
+ }
override protected def newBuilder: NewBuilder = new CarbonSessionStateBuilder(_, _)
}
\ No newline at end of file