You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/10/09 15:50:37 UTC
[35/45] carbondata git commit: [CARBONDATA-2992] Fixed Between Query
Data Mismatch issue for timestamp data type
[CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type
Problem:
Between query is giving wrong result.
Root cause:
For timestamp time when filter is given in yyyy-mm-dd format instead of yyyy-mm-dd HH:MM:SS format it will add cast, In CastExpressionOptimization it is using SimpleDateFormat object to parse the filter value which is failing as filter values is not same.
Solution:
Use SPARK:DateTimeUtils.stringToTime method as spark is handling for above scenario.
This closes #2787
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/11bd0ade
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/11bd0ade
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/11bd0ade
Branch: refs/heads/branch-1.5
Commit: 11bd0ade93a3ac72b42068c3b57ed8bb1203ab47
Parents: 6aa2a90
Author: kumarvishal09 <ku...@gmail.com>
Authored: Fri Sep 28 18:33:29 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Thu Oct 4 18:02:08 2018 +0530
----------------------------------------------------------------------
.../src/test/resources/datedatafile.csv | 7 ++
.../src/test/resources/timestampdatafile.csv | 7 ++
...imestampNoDictionaryColumnCastTestCase.scala | 80 ++++++++++++++++++++
.../execution/CastExpressionOptimization.scala | 67 +++++++++-------
.../bloom/BloomCoarseGrainDataMapSuite.scala | 47 +++++-------
5 files changed, 152 insertions(+), 56 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/datedatafile.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/datedatafile.csv b/integration/spark-common-test/src/test/resources/datedatafile.csv
new file mode 100644
index 0000000..43a615d
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/datedatafile.csv
@@ -0,0 +1,7 @@
+datetype1
+2018-09-11
+2018-09-12
+2018-09-13
+2018-09-14
+2018-09-15
+2018-09-16
http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/timestampdatafile.csv
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/resources/timestampdatafile.csv b/integration/spark-common-test/src/test/resources/timestampdatafile.csv
new file mode 100644
index 0000000..473f330
--- /dev/null
+++ b/integration/spark-common-test/src/test/resources/timestampdatafile.csv
@@ -0,0 +1,7 @@
+timestamptype
+2018-09-11 00:00:00
+2018-09-12 00:00:00
+2018-09-13 00:00:00
+2018-09-14 00:00:00
+2018-09-15 00:00:00
+2018-09-16 00:00:00
http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
new file mode 100644
index 0000000..41c7005
--- /dev/null
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.spark.testsuite.directdictionary
+
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.util.QueryTest
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
+/**
+ * Test Class for detailed query on timestamp datatypes
+ */
+class TimestampNoDictionaryColumnCastTestCase extends QueryTest with BeforeAndAfterAll {
+
+ override def beforeAll {
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+ CarbonProperties.getInstance()
+ .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+ CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
+
+ sql("drop table if exists timestamp_nodictionary")
+ sql("drop table if exists datetype")
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS timestamp_nodictionary
+ (timestamptype timestamp) STORED BY 'carbondata'"""
+ )
+ val csvFilePath = s"$resourcesPath/timestampdatafile.csv"
+ sql(s"LOAD DATA LOCAL INPATH '$csvFilePath' into table timestamp_nodictionary")
+//
+ sql(
+ """
+ CREATE TABLE IF NOT EXISTS datetype
+ (datetype1 date) STORED BY 'carbondata'"""
+ )
+ val csvFilePath1 = s"$resourcesPath/datedatafile.csv"
+ sql(s"LOAD DATA LOCAL INPATH '$csvFilePath1' into table datetype")
+ }
+
+ test("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'") {
+ checkAnswer(
+ sql("select count(*) from timestamp_nodictionary where timestamptype BETWEEN '2018-09-11' AND '2018-09-16'"),
+ Seq(Row(6)
+ )
+ )
+ }
+//
+ test("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'") {
+ checkAnswer(
+ sql("select count(*) from datetype where datetype1 BETWEEN '2018-09-11' AND '2018-09-16'"),
+ Seq(Row(6)
+ )
+ )
+ }
+
+ override def afterAll {
+ sql("drop table timestamp_nodictionary")
+ sql("drop table if exists datetype")
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
index 7e61814..57fb3f0 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CastExpressionOptimization.scala
@@ -29,53 +29,66 @@ import org.apache.spark.sql.FalseExpr
import org.apache.spark.sql.sources
import org.apache.spark.sql.types._
import org.apache.spark.sql.CarbonExpressions.{MatchCast => Cast}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.sources.Filter
+import org.apache.spark.unsafe.types.UTF8String
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties
object CastExpressionOptimization {
-
def typeCastStringToLong(v: Any, dataType: DataType): Any = {
- var parser: SimpleDateFormat = null
- if (dataType == TimestampType) {
- parser = new SimpleDateFormat(CarbonProperties.getInstance
- .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
- CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
- } else if (dataType == DateType) {
- parser = new SimpleDateFormat(CarbonProperties.getInstance
- .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
- CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
- parser.setTimeZone(TimeZone.getTimeZone("GMT"))
- } else {
- throw new UnsupportedOperationException("Unsupported DataType being evaluated.")
- }
- try {
- val value = parser.parse(v.toString).getTime() * 1000L
- value
- } catch {
- case e: ParseException =>
+ if (dataType == TimestampType || dataType == DateType) {
+ val value = if (dataType == TimestampType) {
+ DateTimeUtils.stringToTimestamp(UTF8String.fromString(v.toString))
+ } else {
+ None
+ }
+ if (value.isDefined) {
+ value.get
+ } else {
+ var parser: SimpleDateFormat = null
+ if (dataType == TimestampType) {
+ parser = new SimpleDateFormat(CarbonProperties.getInstance
+ .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+ CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT))
+ } else if (dataType == DateType) {
+ parser = new SimpleDateFormat(CarbonProperties.getInstance
+ .getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
+ CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT))
+ parser.setTimeZone(TimeZone.getTimeZone("GMT"))
+ }
try {
- val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
- parsenew.parse(v.toString).getTime() * 1000L
+ val value = parser.parse(v.toString).getTime() * 1000L
+ value
} catch {
case e: ParseException =>
- val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
- gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
try {
- gmtDay.parse(v.toString).getTime() * 1000L
+ val parsenew: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
+ parsenew.parse(v.toString).getTime() * 1000L
} catch {
case e: ParseException =>
- v
+ val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
+ gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
+ try {
+ gmtDay.parse(v.toString).getTime() * 1000L
+ } catch {
+ case e: ParseException =>
+ v
+ case e: Exception =>
+ v
+ }
case e: Exception =>
v
}
case e: Exception =>
v
}
- case e: Exception =>
- v
+ }
+ }
+ else {
+ throw new UnsupportedOperationException("Unsupported DataType being evaluated.")
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 3360530..3b5b5ca 100644
--- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -756,17 +756,10 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
}
test("test bloom datamap on all basic data types") {
- val originTimestampFormat = CarbonProperties.getInstance().getProperty(
- CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
- CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
- val originDateFormat = CarbonProperties.getInstance().getProperty(
- CarbonCommonConstants.CARBON_DATE_FORMAT,
- CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
-
CarbonProperties.getInstance().addProperty(
- CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
+ CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
CarbonProperties.getInstance().addProperty(
- CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
+ CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
val columnNames = "booleanField,shortField,intField,bigintField,doubleField,stringField," +
"timestampField,decimalField,dateField,charField,floatField"
@@ -811,17 +804,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
sql(
s"""
| INSERT INTO TABLE $bloomDMSampleTable
- | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
- | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
- | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+ | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+ | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+ | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
sql(
s"""
| INSERT INTO TABLE $normalTable
- | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
- | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
- | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+ | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+ | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+ | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
@@ -838,17 +831,17 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
sql(
s"""
| INSERT INTO TABLE $bloomDMSampleTable
- | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
- | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
- | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+ | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+ | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+ | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
sql(
s"""
| INSERT INTO TABLE $normalTable
- | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5),
- | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15),
- | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5),
+ | VALUES(true,1,10,100,48.4,'spark','2015-4-23 12:01:01',1.23,'2015-4-23','aaa',2.5),
+ | (true,1,11,100,44.4,'flink','2015-5-23 12:01:03',23.23,'2015-5-23','ccc',2.15),
+ | (true,3,14,160,43.4,'hive','2015-7-26 12:01:06',3454.32,'2015-7-26','ff',5.5),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
@@ -866,12 +859,12 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField = 'spark'"),
sql(s"SELECT * FROM $normalTable WHERE stringField = 'spark'"))
checkAnswer(
- sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015/7/26 12:01:06'"),
- sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015/7/26 12:01:06'"))
+ sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015-7-26 12:01:06'"),
+ sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015-7-26 12:01:06'"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 23.23"),
sql(s"SELECT * FROM $normalTable WHERE decimalField = 23.23"))
- checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015/4/23'"),
- sql(s"SELECT * FROM $normalTable WHERE dateField = '2015/4/23'"))
+ checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015-4-23'"),
+ sql(s"SELECT * FROM $normalTable WHERE dateField = '2015-4-23'"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField = 'ccc'"),
sql(s"SELECT * FROM $normalTable WHERE charField = 'ccc'"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 2.5"),
@@ -919,10 +912,6 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 0"),
sql(s"SELECT * FROM $normalTable WHERE floatField = 0"))
- CarbonProperties.getInstance().addProperty(
- CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, originTimestampFormat)
- CarbonProperties.getInstance().addProperty(
- CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat)
}
test("test bloom datamap on multiple columns") {