You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by gv...@apache.org on 2018/06/08 11:40:35 UTC
[10/50] [abbrv] carbondata git commit: [CARBONDATA-2487] Block filters for lucene with more than one text_match udf

[CARBONDATA-2487] Block filters for lucene with more than one text_match udf

This closes #2311


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7cba44b9
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7cba44b9
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7cba44b9

Branch: refs/heads/spark-2.3
Commit: 7cba44b9036dfd39cdc8f16b29cdfa5314c85977
Parents: 0e01197
Author: Indhumathi27 <in...@gmail.com>
Authored: Wed May 16 16:48:30 2018 +0530
Committer: Jacky Li <ja...@qq.com>
Committed: Mon May 21 21:58:35 2018 +0800

----------------------------------------------------------------------
 .../lucene/LuceneFineGrainDataMapSuite.scala    | 26 +++++++++++++++-
 .../strategy/CarbonLateDecodeStrategy.scala     | 31 +++++++++++++++-----
 2 files changed, 48 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/7cba44b9/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
index b90d190..0e885de 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
@@ -730,7 +730,7 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll {
     assert(msg.getCause.getMessage.contains("TEXT_MATCH is not supported on table"))
     sql("DROP TABLE table1")
   }
-
+  
   test("test lucene with flush_cache as true") {
     sql("DROP TABLE IF EXISTS datamap_test_table")
     sql(
@@ -775,6 +775,30 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll {
     sql("drop datamap if exists dm_split_false on table datamap_test_table")
   }
 
+  test("test text_match filters with more than one text_match udf ") {
+    sql("DROP TABLE IF EXISTS datamap_test_table")
+    sql(
+      """
+        | CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT)
+        | STORED BY 'carbondata'
+        | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
+      """.stripMargin)
+    sql(
+      s"""
+         | CREATE DATAMAP dm_text ON TABLE datamap_test_table
+         | USING 'lucene'
+         | DMProperties('INDEX_COLUMNS'='name , city')
+      """.stripMargin)
+    sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE datamap_test_table OPTIONS('header'='false')")
+    val msg = intercept[MalformedCarbonCommandException] {
+      sql("SELECT * FROM datamap_test_table WHERE TEXT_MATCH('name:n0*') AND TEXT_MATCH" +
+          "('city:c0*')").show()
+    }
+    assert(msg.getMessage
+      .contains("Specify all search filters for Lucene within a single text_match UDF"))
+    sql("drop datamap if exists dm_text on table datamap_test_table")
+  }
+
   override protected def afterAll(): Unit = {
     LuceneFineGrainDataMapSuite.deleteFile(file2)
     sql("DROP TABLE IF EXISTS normal_test")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/7cba44b9/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
index bca0946..df4c742 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala
@@ -481,14 +481,29 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy {
 
     // For conciseness, all Catalyst filter expressions of type `expressions.Expression` below are
     // called `predicate`s, while all data source filters of type `sources.Filter` are simply called
-    // `filter`s.
-
-    val translated: Seq[(Expression, Filter)] =
-      for {
-        predicate <- predicatesWithoutComplex
-        filter <- translateFilter(predicate)
-      } yield predicate -> filter
-
+    // `filter`s. And block filters for lucene with more than one text_match udf
+    // Todo: handle when lucene and normal query filter is supported
+
+    var count = 0
+    val translated: Seq[(Expression, Filter)] = predicatesWithoutComplex.flatMap {
+      predicate =>
+        if (predicate.isInstanceOf[ScalaUDF]) {
+          predicate match {
+            case u: ScalaUDF if u.function.isInstanceOf[TextMatchUDF] ||
+                                u.function.isInstanceOf[TextMatchMaxDocUDF] => count = count + 1
+          }
+        }
+        if (count > 1) {
+          throw new MalformedCarbonCommandException(
+            "Specify all search filters for Lucene within a single text_match UDF")
+        }
+        val filter = translateFilter(predicate)
+        if (filter.isDefined) {
+          Some(predicate, filter.get)
+        } else {
+          None
+        }
+    }
 
     // A map from original Catalyst expressions to corresponding translated data source filters.
     val translatedMap: Map[Expression, Filter] = translated.toMap