You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/04/20 06:38:09 UTC

[spark] branch branch-3.0 updated: [SPARK-31385][SQL] Restrict micros rebasing via switch arrays up to 2037 year

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f367838  [SPARK-31385][SQL] Restrict micros rebasing via switch arrays up to 2037 year
f367838 is described below

commit f367838bb5086a200926158d16bd969358d588ef
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Mon Apr 20 06:35:16 2020 +0000

    [SPARK-31385][SQL] Restrict micros rebasing via switch arrays up to 2037 year
    
    ### What changes were proposed in this pull request?
    1. Generate rebasing arrays for micros up to 2037 in `RebaseDateTimeSuite.generateRebaseJson()`.
    2. Exclude 4 time zones from the black list in `generateRebaseJson()`.
    3. Re-generate JSON files with rebasing info - `gregorian-julian-rebase-micros.json` and `julian-gregorian-rebase-micros.json`.
    
    ### Why are the changes needed?
    1. `sun.util.calendar.ZoneInfo` resolves DST after 2037 year incorrectly. See https://github.com/AdoptOpenJDK/openjdk-jdk8u/blob/aa318070b27849f1fe00d14684b2a40f7b29bf79/jdk/src/share/classes/sun/util/calendar/ZoneInfo.java#L55-L62 . By restricting the rebase arrays to 2037 year, we follow the behaviour of `ZoneInfo` which uses DST of 2037 for all years beyond 2037.
    2. To enable optimization of micros rebasing via switch arrays for the time zones:
        - Asia/Tehran
        - Iran
        - Africa/Casablanca
        - Africa/El_Aaiun
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    By existing test suites `RebaseDateTimeUtils`, `DateTimeUtilsSuite` and `DateFunctionsSuite`.
    
    Closes #28253 from MaxGekk/fix-4-time-zones-rebasing.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit 88d39e5a89cb76286ed8201c4d8b94d8a6199a42)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../main/resources/gregorian-julian-rebase-micros.json   | 16 ++++++++++++++++
 .../main/resources/julian-gregorian-rebase-micros.json   | 16 ++++++++++++++++
 .../spark/sql/catalyst/util/RebaseDateTimeSuite.scala    | 12 +++---------
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json b/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
index 8d36fbd..e5f9b0c 100644
--- a/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
+++ b/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
@@ -55,6 +55,10 @@
   "switches" : [ -62135604309, -59006369109, -55850695509, -52695021909, -46383588309, -43227914709, -40072241109, -33760807509, -30605133909, -27449460309, -21138026709, -17982353109, -14826679509, -12220077909, -12219991509, -12219905109, -12219818709, -12219732309, -12219645909, -12219559509, -12219473109, -12219386709, -12219300309, -2208988800 ],
   "diffs" : [ -172491, -86091, 309, 86709, 173109, 259509, 345909, 432309, 518709, 605109, 691509, 777909, 864309, 777909, 691509, 605109, 518709, 432309, 345909, 259509, 173109, 86709, 309, 0 ]
 }, {
+  "tz" : "Africa/Casablanca",
+  "switches" : [ -62135594980, -59006359780, -55850686180, -52695012580, -46383578980, -43227905380, -40072231780, -33760798180, -30605124580, -27449450980, -21138017380, -17982343780, -14826670180, -12220068580, -12219982180, -12219895780, -12219809380, -12219722980, -12219636580, -12219550180, -12219463780, -12219377380, -12219290980, -2208988800 ],
+  "diffs" : [ -174620, -88220, -1820, 84580, 170980, 257380, 343780, 430180, 516580, 602980, 689380, 775780, 862180, 775780, 689380, 602980, 516580, 430180, 343780, 257380, 170980, 84580, -1820, 0 ]
+}, {
   "tz" : "Africa/Ceuta",
   "switches" : [ -62135595524, -59006360324, -55850686724, -52695013124, -46383579524, -43227905924, -40072232324, -33760798724, -30605125124, -27449451524, -21138017924, -17982344324, -14826670724, -12220069124, -12219982724, -12219896324, -12219809924, -12219723524, -12219637124, -12219550724, -12219464324, -12219377924, -12219291524, -2208988800 ],
   "diffs" : [ -177676, -91276, -4876, 81524, 167924, 254324, 340724, 427124, 513524, 599924, 686324, 772724, 859124, 772724, 686324, 599924, 513524, 427124, 340724, 254324, 167924, 81524, -4876, 0 ]
@@ -79,6 +83,10 @@
   "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
   "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
 }, {
+  "tz" : "Africa/El_Aaiun",
+  "switches" : [ -62135593632, -59006358432, -55850684832, -52695011232, -46383577632, -43227904032, -40072230432, -33760796832, -30605123232, -27449449632, -21138016032, -17982342432, -14826668832, -12220067232, -12219980832, -12219894432, -12219808032, -12219721632, -12219635232, -12219548832, -12219462432, -12219376032, -12219289632, -2208988800 ],
+  "diffs" : [ -175968, -89568, -3168, 83232, 169632, 256032, 342432, 428832, 515232, 601632, 688032, 774432, 860832, 774432, 688032, 601632, 515232, 428832, 342432, 256032, 169632, 83232, -3168, 0 ]
+}, {
   "tz" : "Africa/Freetown",
   "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
   "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
@@ -1251,6 +1259,10 @@
   "switches" : [ -62135607551, -59006372351, -55850698751, -52695025151, -46383591551, -43227917951, -40072244351, -33760810751, -30605137151, -27449463551, -21138029951, -17982356351, -14826682751, -12220081151, -12219994751, -12219908351, -12219821951, -12219735551, -12219649151, -12219562751, -12219476351, -12219389951, -12219303551, -2208988800 ],
   "diffs" : [ -176449, -90049, -3649, 82751, 169151, 255551, 341951, 428351, 514751, 601151, 687551, 773951, 860351, 773951, 687551, 601151, 514751, 428351, 341951, 255551, 169151, 82751, -3649, 0 ]
 }, {
+  "tz" : "Asia/Tehran",
+  "switches" : [ -62135609144, -59006373944, -55850700344, -52695026744, -46383593144, -43227919544, -40072245944, -33760812344, -30605138744, -27449465144, -21138031544, -17982357944, -14826684344, -12220082744, -12219996344, -12219909944, -12219823544, -12219737144, -12219650744, -12219564344, -12219477944, -12219391544, -12219305144, -2208988800 ],
+  "diffs" : [ -173056, -86656, -256, 86144, 172544, 258944, 345344, 431744, 518144, 604544, 690944, 777344, 863744, 777344, 690944, 604544, 518144, 431744, 345344, 258944, 172544, 86144, -256, 0 ]
+}, {
   "tz" : "Asia/Tel_Aviv",
   "switches" : [ -62135605254, -59006370054, -55850696454, -52695022854, -46383589254, -43227915654, -40072242054, -33760808454, -30605134854, -27449461254, -21138027654, -17982354054, -14826680454, -12220078854, -12219992454, -12219906054, -12219819654, -12219733254, -12219646854, -12219560454, -12219474054, -12219387654, -12219301254, -2840149254, -2208988800 ],
   "diffs" : [ -171546, -85146, 1254, 87654, 174054, 260454, 346854, 433254, 519654, 606054, 692454, 778854, 865254, 778854, 692454, 606054, 519654, 433254, 346854, 260454, 174054, 87654, 1254, 1240, 0 ]
@@ -2003,6 +2015,10 @@
   "switches" : [ -62135610112, -59006374912, -55850701312, -52695027712, -46383594112, -43227920512, -40072246912, -33760813312, -30605139712, -27449466112, -21138032512, -17982358912, -14826685312, -12220083712, -12219997312, -12219910912, -12219824512, -12219738112, -12219651712, -12219565312, -12219478912, -12219392512, -12219306112, -2208988800 ],
   "diffs" : [ -173888, -87488, -1088, 85312, 171712, 258112, 344512, 430912, 517312, 603712, 690112, 776512, 862912, 776512, 690112, 603712, 517312, 430912, 344512, 258112, 171712, 85312, -1088, 0 ]
 }, {
+  "tz" : "Iran",
+  "switches" : [ -62135609144, -59006373944, -55850700344, -52695026744, -46383593144, -43227919544, -40072245944, -33760812344, -30605138744, -27449465144, -21138031544, -17982357944, -14826684344, -12220082744, -12219996344, -12219909944, -12219823544, -12219737144, -12219650744, -12219564344, -12219477944, -12219391544, -12219305144, -2208988800 ],
+  "diffs" : [ -173056, -86656, -256, 86144, 172544, 258944, 345344, 431744, 518144, 604544, 690944, 777344, 863744, 777344, 690944, 604544, 518144, 431744, 345344, 258944, 172544, 86144, -256, 0 ]
+}, {
   "tz" : "Israel",
   "switches" : [ -62135605254, -59006370054, -55850696454, -52695022854, -46383589254, -43227915654, -40072242054, -33760808454, -30605134854, -27449461254, -21138027654, -17982354054, -14826680454, -12220078854, -12219992454, -12219906054, -12219819654, -12219733254, -12219646854, -12219560454, -12219474054, -12219387654, -12219301254, -2840149254, -2208988800 ],
   "diffs" : [ -171546, -85146, 1254, 87654, 174054, 260454, 346854, 433254, 519654, 606054, 692454, 778854, 865254, 778854, 692454, 606054, 519654, 433254, 346854, 260454, 174054, 87654, 1254, 1240, 0 ]
diff --git a/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json b/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
index b733ac4..eddc16f 100644
--- a/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
+++ b/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
@@ -55,6 +55,10 @@
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
   "diffs" : [ 172491, 86091, -309, -86709, -173109, -259509, -345909, -432309, -518709, -605109, -691509, -777909, -864309, -309, 0 ]
 }, {
+  "tz" : "Africa/Casablanca",
+  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
+  "diffs" : [ 174620, 88220, 1820, -84580, -170980, -257380, -343780, -430180, -516580, -602980, -689380, -775780, -862180, 1820, 0 ]
+}, {
   "tz" : "Africa/Ceuta",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
   "diffs" : [ 177676, 91276, 4876, -81524, -167924, -254324, -340724, -427124, -513524, -599924, -686324, -772724, -859124, 4876, 0 ]
@@ -79,6 +83,10 @@
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
   "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
 }, {
+  "tz" : "Africa/El_Aaiun",
+  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
+  "diffs" : [ 175968, 89568, 3168, -83232, -169632, -256032, -342432, -428832, -515232, -601632, -688032, -774432, -860832, 3168, 0 ]
+}, {
   "tz" : "Africa/Freetown",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
   "diffs" : [ 173768, 87368, 968, -85432, -171832, -258232, -344632, -431032, -517432, -603832, -690232, -776632, -863032, 968, 0 ]
@@ -1251,6 +1259,10 @@
   "switches" : [ -62135784000, -59006462400, -55850702400, -52694942400, -46383422400, -43227662400, -40071902400, -33760382400, -30604622400, -27448862400, -21137342400, -17981582400, -14825822400, -12219307200, -2208988800 ],
   "diffs" : [ 176449, 90049, 3649, -82751, -169151, -255551, -341951, -428351, -514751, -601151, -687551, -773951, -860351, 3649, 0 ]
 }, {
+  "tz" : "Asia/Tehran",
+  "switches" : [ -62135782200, -59006460600, -55850700600, -52694940600, -46383420600, -43227660600, -40071900600, -33760380600, -30604620600, -27448860600, -21137340600, -17981580600, -14825820600, -12219305400, -2208988800 ],
+  "diffs" : [ 173056, 86656, 256, -86144, -172544, -258944, -345344, -431744, -518144, -604544, -690944, -777344, -863744, 256, 0 ]
+}, {
   "tz" : "Asia/Tel_Aviv",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2840148014, -2208988800 ],
   "diffs" : [ 171546, 85146, -1254, -87654, -174054, -260454, -346854, -433254, -519654, -606054, -692454, -778854, -865254, -1254, -1240, 0 ]
@@ -2003,6 +2015,10 @@
   "switches" : [ -62135784000, -59006462400, -55850702400, -52694942400, -46383422400, -43227662400, -40071902400, -33760382400, -30604622400, -27448862400, -21137342400, -17981582400, -14825822400, -12219307200, -2208988800 ],
   "diffs" : [ 173888, 87488, 1088, -85312, -171712, -258112, -344512, -430912, -517312, -603712, -690112, -776512, -862912, 1088, 0 ]
 }, {
+  "tz" : "Iran",
+  "switches" : [ -62135782200, -59006460600, -55850700600, -52694940600, -46383420600, -43227660600, -40071900600, -33760380600, -30604620600, -27448860600, -21137340600, -17981580600, -14825820600, -12219305400, -2208988800 ],
+  "diffs" : [ 173056, 86656, 256, -86144, -172544, -258944, -345344, -431744, -518144, -604544, -690944, -777344, -863744, 256, 0 ]
+}, {
   "tz" : "Israel",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2840148014, -2208988800 ],
   "diffs" : [ 171546, 85146, -1254, -87654, -174054, -260454, -346854, -433254, -519654, -606054, -692454, -778854, -865254, -1254, -1240, 0 ]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
index 2f957ef..0111fa0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
@@ -256,22 +256,16 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
     case class RebaseRecord(tz: String, switches: Array[Long], diffs: Array[Long])
 
     val result = new ArrayBuffer[RebaseRecord]()
-    // The time zones are excluded because:
-    // 1. Julian to Gregorian rebasing doesn't match to the opposite rebasing from
-    //    Gregorian to Julian rebasing.
-    // 2. Linear searching over switch points might be slow.
-    // 3. Results after the end time point 2100-01-01 are wrong.
-    // See SPARK-31385
-    val blacklist = Set("Asia/Tehran", "Iran", "Africa/Casablanca", "Africa/El_Aaiun")
     ALL_TIMEZONES
-      .filterNot(zid => blacklist.contains(zid.getId))
       .sortBy(_.getId)
       .foreach { zid =>
       withDefaultTimeZone(zid) {
         val start = adjustFunc(instantToMicros(LocalDateTime.of(1, 1, 1, 0, 0, 0)
           .atZone(zid)
           .toInstant))
-        val end = adjustFunc(instantToMicros(LocalDateTime.of(2100, 1, 1, 0, 0, 0)
+        // sun.util.calendar.ZoneInfo resolves DST after 2037 year incorrectly.
+        // See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8073446
+        val end = adjustFunc(instantToMicros(LocalDateTime.of(2037, 1, 1, 0, 0, 0)
           .atZone(zid)
           .toInstant))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org