You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/07/16 03:43:05 UTC

[spark] branch master updated: [SPARK-28408][SQL][TEST] Restrict test values for DateType, TimestampType and CalendarIntervalType

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e26d4d  [SPARK-28408][SQL][TEST] Restrict test values for DateType, TimestampType and CalendarIntervalType
8e26d4d is described below

commit 8e26d4d61638af8afab836c47624a8567aceefbb
Author: Maxim Gekk <ma...@gmail.com>
AuthorDate: Mon Jul 15 20:42:33 2019 -0700

    [SPARK-28408][SQL][TEST] Restrict test values for DateType, TimestampType and CalendarIntervalType
    
    ## What changes were proposed in this pull request?
    
    Existing random generators in tests produce wide ranges of values that can be out of supported ranges for:
    - `DateType`, the valid range is `[0001-01-01, 9999-12-31]`
    - `TimestampType` supports values in `[0001-01-01T00:00:00.000000Z, 9999-12-31T23:59:59.999999Z]`
    - `CalendarIntervalType` should define intervals for the ranges above.
    
    Dates and timestamps produced by random literal generators are usually out of valid ranges for those types. And tests just check invalid values or values caused by arithmetic overflow.
    
    In the PR, I propose to restrict tested pseudo-random values by valid ranges of `DateType`, `TimestampType` and `CalendarIntervalType`. This should allow to check valid values in test, and avoid wasting time on a priori invalid inputs.
    
    ## How was this patch tested?
    
    The changes were checked by `DateExpressionsSuite` and modified `DateTimeUtils.dateAddMonths`:
    ```Scala
      def dateAddMonths(days: SQLDate, months: Int): SQLDate = {
        localDateToDays(LocalDate.ofEpochDay(days).plusMonths(months))
      }
    ```
    
    Closes #25166 from MaxGekk/datetime-lit-random-gen.
    
    Authored-by: Maxim Gekk <ma...@gmail.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../expressions/DateExpressionsSuite.scala         | 16 +++++++---
 .../expressions/ExpressionEvalHelper.scala         |  2 +-
 .../catalyst/expressions/LiteralGenerator.scala    | 37 ++++++++++++++++++----
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 9006dc4..4e8322d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -462,13 +462,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(AddMonths(Literal.create(null, DateType), Literal(1)), null)
     checkEvaluation(AddMonths(Literal.create(null, DateType), Literal.create(null, IntegerType)),
       null)
+    // Valid range of DateType is [0001-01-01, 9999-12-31]
+    val maxMonthInterval = 10000 * 12
     checkEvaluation(
-      AddMonths(Literal(Date.valueOf("2015-01-30")), Literal(Int.MinValue)), -938165455)
+      AddMonths(Literal(Date.valueOf("0001-01-01")), Literal(maxMonthInterval)), 2933261)
     checkEvaluation(
-      AddMonths(Literal(Date.valueOf("2016-02-28")), positiveIntLit), 1014213)
-    checkEvaluation(
-      AddMonths(Literal(Date.valueOf("2016-02-28")), negativeIntLit), -980528)
-    checkConsistencyBetweenInterpretedAndCodegen(AddMonths, DateType, IntegerType)
+      AddMonths(Literal(Date.valueOf("9999-12-31")), Literal(-1 * maxMonthInterval)), -719529)
+    // Test evaluation results between Interpreted mode and Codegen mode
+    forAll (
+      LiteralGenerator.randomGen(DateType),
+      LiteralGenerator.monthIntervalLiterGen
+    ) { (l1: Literal, l2: Literal) =>
+      cmpInterpretWithCodegen(EmptyRow, AddMonths(l1, l2))
+    }
   }
 
   test("months_between") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 1c91ada..a2c0ce3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -398,7 +398,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBa
     }
   }
 
-  private def cmpInterpretWithCodegen(inputRow: InternalRow, expr: Expression): Unit = {
+  def cmpInterpretWithCodegen(inputRow: InternalRow, expr: Expression): Unit = {
     val interpret = try {
       evaluateWithoutCodegen(expr, inputRow)
     } catch {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
index be5fdb5..b111797 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
+import java.time.{Duration, Instant, LocalDate}
+import java.util.concurrent.TimeUnit
 
 import org.scalacheck.{Arbitrary, Gen}
 
@@ -100,23 +102,44 @@ object LiteralGenerator {
   lazy val booleanLiteralGen: Gen[Literal] =
     for { b <- Arbitrary.arbBool.arbitrary } yield Literal.create(b, BooleanType)
 
-  lazy val dateLiteralGen: Gen[Literal] =
-    for { d <- Arbitrary.arbInt.arbitrary } yield Literal.create(new Date(d), DateType)
+  lazy val dateLiteralGen: Gen[Literal] = {
+    // Valid range for DateType is [0001-01-01, 9999-12-31]
+    val minDay = LocalDate.of(1, 1, 1).toEpochDay
+    val maxDay = LocalDate.of(9999, 12, 31).toEpochDay
+    for { day <- Gen.choose(minDay, maxDay) }
+      yield Literal.create(new Date(day * DateTimeUtils.MILLIS_PER_DAY), DateType)
+  }
 
   lazy val timestampLiteralGen: Gen[Literal] = {
     // Catalyst's Timestamp type stores number of microseconds since epoch in
     // a variable of Long type. To prevent arithmetic overflow of Long on
     // conversion from milliseconds to microseconds, the range of random milliseconds
     // since epoch is restricted here.
-    val maxMillis = Long.MaxValue / DateTimeUtils.MICROS_PER_MILLIS
-    val minMillis = Long.MinValue / DateTimeUtils.MICROS_PER_MILLIS
+    // Valid range for TimestampType is [0001-01-01T00:00:00.000000Z, 9999-12-31T23:59:59.999999Z]
+    val minMillis = Instant.parse("0001-01-01T00:00:00.000000Z").toEpochMilli
+    val maxMillis = Instant.parse("9999-12-31T23:59:59.999999Z").toEpochMilli
     for { millis <- Gen.choose(minMillis, maxMillis) }
       yield Literal.create(new Timestamp(millis), TimestampType)
   }
 
-  lazy val calendarIntervalLiterGen: Gen[Literal] =
-    for { m <- Arbitrary.arbInt.arbitrary; s <- Arbitrary.arbLong.arbitrary}
-      yield Literal.create(new CalendarInterval(m, s), CalendarIntervalType)
+  // Valid range for DateType and TimestampType is [0001-01-01, 9999-12-31]
+  private val maxIntervalInMonths: Int = 10000 * 12
+
+  lazy val monthIntervalLiterGen: Gen[Literal] = {
+    for { months <- Gen.choose(-1 * maxIntervalInMonths, maxIntervalInMonths) }
+      yield Literal.create(months, IntegerType)
+  }
+
+  lazy val calendarIntervalLiterGen: Gen[Literal] = {
+    val maxDurationInSec = Duration.between(
+      Instant.parse("0001-01-01T00:00:00.000000Z"),
+      Instant.parse("9999-12-31T23:59:59.999999Z")).getSeconds
+    val maxMicros = TimeUnit.SECONDS.toMicros(maxDurationInSec)
+    for {
+      months <- Gen.choose(-1 * maxIntervalInMonths, maxIntervalInMonths)
+      micros <- Gen.choose(-1 * maxMicros, maxMicros)
+    } yield Literal.create(new CalendarInterval(months, micros), CalendarIntervalType)
+  }
 
 
   // Sometimes, it would be quite expensive when unlimited value is used,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org