You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/10/09 15:50:37 UTC

[35/45] carbondata git commit: [CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type

[CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type

Problem:
Between query is giving wrong result.
Root cause:
For timestamp time when filter is given in yyyy-mm-dd format instead of yyyy-mm-dd HH:MM:SS format it will add cast, In CastExpressionOptimization it is using SimpleDateFormat object to parse the filter value which is failing as filter values is not same.
Solution:
Use SPARK:DateTimeUtils.stringToTime method as spark is handling for above scenario.

This closes #2787


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/11bd0ade
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/11bd0ade
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/11bd0ade

Branch: refs/heads/branch-1.5
Commit: 11bd0ade93a3ac72b42068c3b57ed8bb1203ab47
Parents: 6aa2a90
Author: kumarvishal09 <ku...@gmail.com>
Authored: Fri Sep 28 18:33:29 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Oct 4 18:02:08 2018 +0530

----------------------------------------------------------------------
 .../src/test/resources/datedatafile.csv         |  7 ++
 .../src/test/resources/timestampdatafile.csv    |  7 ++
 ...imestampNoDictionaryColumnCastTestCase.scala | 80 ++++++++++++++++++++
 .../execution/CastExpressionOptimization.scala  | 67 +++++++++-------
 .../bloom/BloomCoarseGrainDataMapSuite.scala    | 47 +++++-------
 5 files changed, 152 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/datedatafile.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/datedatafile.csv b/integration/spark-common-test/src/test/resources/datedatafile.csv
new file mode 100644
index 0000000..43a615d
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/datedatafile.csv
@@ -0,0 +1,7 @@
+datetype1
+2018-09-11
+2018-09-12
+2018-09-13
+2018-09-14
+2018-09-15
+2018-09-16

http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/timestampdatafile.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/timestampdatafile.csv b/integration/spark-common-test/src/test/resources/timestampdatafile.csv
new file mode 100644
index 0000000..473f330
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/timestampdatafile.csv
@@ -0,0 +1,7 @@
+timestamptype
+2018-09-11 00:00:00
+2018-09-12 00:00:00
+2018-09-13 00:00:00
+2018-09-14 00:00:00
+2018-09-15 00:00:00
+2018-09-16 00:00:00

http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
new file mode 100644
index 0000000..41c7005
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.directdictionary
+
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for detailed query on timestamp datatypes
+ */
+class TimestampNoDictionaryColumnCastTestCase extends QueryTest with BeforeAndAfterAll {
+
+  override def beforeAll {
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+        CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+        CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
+
+      sql("drop table if exists timestamp_nodictionary")
+    sql("drop table if exists datetype")
+      sql(
+        """
+         CREATE TABLE IF NOT EXISTS timestamp_nodictionary
+        (timestamptype timestamp) STORED BY 'carbondata'"""
+      )
+      val csvFilePath = s"$resourcesPath/timestampdatafile.csv"
+      sql(s"LOAD DATA LOCAL INPATH '$csvFilePath' into table timestamp_nodictionary")
+//
+    sql(
+      """
+         CREATE TABLE IF NOT EXISTS datetype
+        (datetype1 date) STORED BY 'carbondata'"""
+    )
+    val csvFilePath1 = s"$resourcesPath/datedatafile.csv"
+    sql(s"LOAD DATA LOCAL INPATH '$csvFilePath1' into table datetype")
+  }
+
+  test("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'") {
+    checkAnswer(
+      sql("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'"),
+      Seq(Row(6)
+      )
+    )
+  }
+//
+  test("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'") {
+    checkAnswer(
+      sql("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'"),
+      Seq(Row(6)
+      )
+    )
+  }
+
+  override def afterAll {
+    sql("drop table timestamp_nodictionary")
+    sql("drop table if exists datetype")
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
index 7e61814..57fb3f0 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
@@ -29,53 +29,66 @@ import org.apache.spark.sql.FalseExpr
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.CarbonExpressions.{MatchCast => Cast}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.sources.Filter
+import org.apache.spark.unsafe.types.UTF8String
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.util.CarbonProperties
 
 object CastExpressionOptimization {
 
-
   def typeCastStringToLong(v: Any, dataType: DataType): Any = {
-    var parser: SimpleDateFormat = null
-    if (dataType == TimestampType) {
-      parser = new SimpleDateFormat(CarbonProperties.getInstance
-        .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
-          CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
-    } else if (dataType == DateType) {
-      parser = new SimpleDateFormat(CarbonProperties.getInstance
-        .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
-          CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
-      parser.setTimeZone(TimeZone.getTimeZone("GMT"))
-    } else {
-      throw new UnsupportedOperationException("Unsupported DataType being evaluated.")
-    }
-    try {
-      val value = parser.parse(v.toString).getTime() * 1000L
-      value
-    } catch {
-      case e: ParseException =>
+    if (dataType == TimestampType || dataType == DateType) {
+      val value = if (dataType == TimestampType) {
+        DateTimeUtils.stringToTimestamp(UTF8String.fromString(v.toString))
+      } else {
+        None
+      }
+      if (value.isDefined) {
+        value.get
+      } else {
+        var parser: SimpleDateFormat = null
+        if (dataType == TimestampType) {
+          parser = new SimpleDateFormat(CarbonProperties.getInstance
+            .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+              CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
+        } else if (dataType == DateType) {
+          parser = new SimpleDateFormat(CarbonProperties.getInstance
+            .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+              CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
+          parser.setTimeZone(TimeZone.getTimeZone("GMT"))
+        }
         try {
-          val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
-          parsenew.parse(v.toString).getTime() * 1000L
+          val value = parser.parse(v.toString).getTime() * 1000L
+          value
         } catch {
           case e: ParseException =>
-            val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
-            gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
             try {
-              gmtDay.parse(v.toString).getTime() * 1000L
+              val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
+              parsenew.parse(v.toString).getTime() * 1000L
             } catch {
               case e: ParseException =>
-                v
+                val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
+                gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
+                try {
+                  gmtDay.parse(v.toString).getTime() * 1000L
+                } catch {
+                  case e: ParseException =>
+                    v
+                  case e: Exception =>
+                    v
+                }
               case e: Exception =>
                 v
             }
           case e: Exception =>
             v
         }
-      case e: Exception =>
-        v
+      }
+    }
+    else {
+      throw new UnsupportedOperationException("Unsupported DataType being evaluated.")
     }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 3360530..3b5b5ca 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -756,17 +756,10 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
   }
 
   test("test bloom datamap on all basic data types") {
-    val originTimestampFormat = CarbonProperties.getInstance().getProperty(
-      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
-      CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
-    val originDateFormat = CarbonProperties.getInstance().getProperty(
-      CarbonCommonConstants.CARBON_DATE_FORMAT,
-      CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
-
     CarbonProperties.getInstance().addProperty(
-      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
+      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
     CarbonProperties.getInstance().addProperty(
-      CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
+      CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
 
     val columnNames = "booleanField,shortField,intField,bigintField,doubleField,stringField," +
       "timestampField,decimalField,dateField,charField,floatField"
@@ -811,17 +804,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
     sql(
       s"""
          | INSERT INTO TABLE $bloomDMSampleTable
-         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
-         | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
-         | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
          | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
        """.stripMargin)
     sql(
       s"""
          | INSERT INTO TABLE $normalTable
-         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
-         | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
-         | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
          | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
        """.stripMargin)
 
@@ -838,17 +831,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
     sql(
       s"""
          | INSERT INTO TABLE $bloomDMSampleTable
-         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
-         | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
-         | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
          | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
        """.stripMargin)
     sql(
       s"""
          | INSERT INTO TABLE $normalTable
-         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
-         | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
-         | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
          | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
        """.stripMargin)
 
@@ -866,12 +859,12 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
     checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField = 'spark'"),
       sql(s"SELECT * FROM $normalTable WHERE stringField = 'spark'"))
     checkAnswer(
-      sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015/7/26 12:01:06'"),
-      sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015/7/26 12:01:06'"))
+      sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015-7-26 12:01:06'"),
+      sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015-7-26 12:01:06'"))
     checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 23.23"),
       sql(s"SELECT * FROM $normalTable WHERE decimalField = 23.23"))
-    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015/4/23'"),
-      sql(s"SELECT * FROM $normalTable WHERE dateField = '2015/4/23'"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015-4-23'"),
+      sql(s"SELECT * FROM $normalTable WHERE dateField = '2015-4-23'"))
     checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField = 'ccc'"),
       sql(s"SELECT * FROM $normalTable WHERE charField = 'ccc'"))
     checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 2.5"),
@@ -919,10 +912,6 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
     checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 0"),
       sql(s"SELECT * FROM $normalTable WHERE floatField = 0"))
 
-    CarbonProperties.getInstance().addProperty(
-      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, originTimestampFormat)
-    CarbonProperties.getInstance().addProperty(
-      CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat)
   }
 
   test("test bloom datamap on multiple columns") {