You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/12/01 16:04:58 UTC
[carbondata] branch master updated: [CARBONDATA-4064] Fix tpcds query failure with SI

This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 86080cd  [CARBONDATA-4064] Fix tpcds query failure with SI
86080cd is described below

commit 86080cdb20aa5a251d279f8787ffb747d85bc83d
Author: Indhumathi27 <in...@gmail.com>
AuthorDate: Fri Nov 27 19:29:38 2020 +0530

    [CARBONDATA-4064] Fix tpcds query failure with SI
    
    Why is this PR needed?
    TPCDS queries are failing with None.get exception when SI is configured for some tables.
    
    What changes were proposed in this PR?
    Check if parentRelation is none for filter condition, and return false for checkIfPushDownOrderByLimitAndNotNullFilter
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    No
    
    This closes #4030
---
 .../secondaryindex/TestIndexModelWithAggQueries.scala    | 16 ++++++++++++++++
 .../optimizer/CarbonSecondaryIndexOptimizer.scala        |  7 ++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestIndexModelWithAggQueries.scala b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestIndexModelWithAggQueries.scala
index a1336f0..5255ebd 100644
--- a/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestIndexModelWithAggQueries.scala
+++ b/index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestIndexModelWithAggQueries.scala
@@ -171,6 +171,22 @@ class TestIndexModelWithAggQueries extends QueryTest with BeforeAndAfterAll {
     assert(FileFactory.isFileExist(indexTable.getSegmentPath("1")))
   }
 
+  test("test pushing down filter for broadcast join with ISnotNull Filter and " +
+       "order by and Join with SI") {
+    sql("drop table if exists catalog_returns")
+    sql("drop table if exists date_dim")
+    sql("create table catalog_returns(cr_returned_date_sk string)  STORED AS carbondata ")
+    sql("create table date_dim( d_date_sk string) STORED AS carbondata")
+    sql("insert into catalog_returns select 2450926")
+    sql("insert into date_dim select 2450926")
+    val query = "SELECT  c.cr_returned_date_sk cr_returned_date_sk, count(*) cnt " +
+                "FROM catalog_returns c, date_dim d WHERE d.d_date_sk = c.cr_returned_date_sk " +
+                "group by c.cr_returned_date_sk having count(*) >=1 order by cnt limit 5"
+    val result = sql(query)
+    sql("create index index_si on table date_dim(d_date_sk) AS 'carbondata' ")
+    checkAnswer(result, sql(query))
+  }
+
   override def afterAll: Unit = {
     sql("drop table if exists source")
     sql("drop table if exists catalog_return")
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
index 59c3c64..81f96ed 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/optimizer/CarbonSecondaryIndexOptimizer.scala
@@ -943,7 +943,12 @@ class CarbonSecondaryIndexOptimizer(sparkSession: SparkSession) {
     val filterAttributes = filter.condition collect {
       case attr: AttributeReference => attr.name.toLowerCase
     }
-    val parentTableRelation = MatchIndexableRelation.unapply(filter.child).get
+    // get the parent table logical relation from the filter node
+    val parentRelation = MatchIndexableRelation.unapply(filter.child)
+    if (parentRelation.isEmpty) {
+      return false
+    }
+    val parentTableRelation = parentRelation.get
     val matchingIndexTables = CarbonCostBasedOptimizer.identifyRequiredTables(
       filterAttributes.toSet.asJava,
       CarbonIndexUtil.getSecondaryIndexes(parentTableRelation).mapValues(_.toList.asJava).asJava)