You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by xx...@apache.org on 2021/04/21 11:55:44 UTC

[kylin] branch kylin-on-parquet-v2 updated: KYLIN-4965 Fix the 'can not resolve columns' error when create model and add lookup table column as filter condition

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch kylin-on-parquet-v2
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/kylin-on-parquet-v2 by this push:
     new 279fbf3  KYLIN-4965 Fix the 'can not resolve columns' error when create model and add lookup table column as filter condition
279fbf3 is described below

commit 279fbf34b71d00ef71ac8a931e03a50e6f8a078b
Author: Zhichao Zhang <zh...@apache.org>
AuthorDate: Sun Apr 11 14:08:48 2021 +0800

    KYLIN-4965 Fix the 'can not resolve columns' error when create model and add lookup table column as filter condition
---
 .gitignore                                                         | 2 ++
 .../test_case_data/parquet_test/model_desc/ci_left_join_model.json | 2 +-
 .../org/apache/kylin/engine/spark/builder/CreateFlatTable.scala    | 7 ++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9213fd0..a5c3f92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,8 @@ release.properties
 #IDEA
 *.iml
 .settings
+*.ipr
+*.iws
 
 # External tool builders
 .externalToolBuilders/
diff --git a/examples/test_case_data/parquet_test/model_desc/ci_left_join_model.json b/examples/test_case_data/parquet_test/model_desc/ci_left_join_model.json
index 233c64f..57fa59f 100644
--- a/examples/test_case_data/parquet_test/model_desc/ci_left_join_model.json
+++ b/examples/test_case_data/parquet_test/model_desc/ci_left_join_model.json
@@ -223,7 +223,7 @@
     "TEST_KYLIN_FACT.ITEM_COUNT"
   ],
   "last_modified": 1422435345352,
-  "filter_condition": null,
+  "filter_condition": "TEST_CAL_DT.QTR_BEG_DT < DATE '2021-01-01' and TEST_KYLIN_FACT.TRANS_ID < 10000",
   "partition_desc": {
     "partition_date_column": "DEFAULT.TEST_KYLIN_FACT.CAL_DT",
     "partition_date_start": 0,
diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
index e920ddb..6e8dc22 100644
--- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
+++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/kylin/engine/spark/builder/CreateFlatTable.scala
@@ -44,7 +44,6 @@ class CreateFlatTable(val seg: SegmentInfo,
 
     val ccCols = seg.allColumns.filter(_.isInstanceOf[ComputedColumnDesc]).toSet
     var rootFactDataset = generateTableDataset(seg.factTable, ccCols.toSeq, ss, seg.project)
-    rootFactDataset = applyFilterCondition(seg, rootFactDataset)
 
     logInfo(s"Create flattable need join lookup tables $needJoin, need encode cols $needEncode")
 
@@ -58,6 +57,9 @@ class CreateFlatTable(val seg: SegmentInfo,
         val allTableDataset = Seq(rootFactDataset) ++ encodedLookupMap.map(_._2)
 
         rootFactDataset = joinFactTableWithLookupTables(rootFactDataset, encodedLookupMap, seg, ss)
+        // KYLIN-4965: Must apply filter conditions after join lookup tables,
+        // as there maybe some filter columns which are belonged to lookup tables.
+        rootFactDataset = applyFilterCondition(seg, rootFactDataset)
         rootFactDataset = encodeWithCols(rootFactDataset,
           filterCols(allTableDataset, ccCols),
           filterCols(allTableDataset, toBuildDictSet),
@@ -65,6 +67,9 @@ class CreateFlatTable(val seg: SegmentInfo,
       case (true, false) =>
         val lookupTableDatasetMap = generateLookupTableDataset(seg, ccCols.toSeq, ss)
         rootFactDataset = joinFactTableWithLookupTables(rootFactDataset, lookupTableDatasetMap, seg, ss)
+        // KYLIN-4965: Must apply filter conditions after join lookup tables,
+        // as there maybe some filter columns which are belonged to lookup tables.
+        rootFactDataset = applyFilterCondition(seg, rootFactDataset)
         rootFactDataset = withColumn(rootFactDataset, ccCols)
       case (false, true) =>
         val (dictCols, encodeCols) = (seg.toBuildDictColumns, seg.allDictColumns)