You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2019/04/18 11:59:14 UTC

[arrow] branch master updated: ARROW-5163 : [Gandiva] Cast timestamp/date are incorrectly evaluating year 0097 to 1997

This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 70813d7  ARROW-5163 : [Gandiva] Cast timestamp/date are incorrectly evaluating year 0097 to 1997
70813d7 is described below

commit 70813d7584987c5de890ba0d34ff040eda4f0702
Author: shyam <sh...@dremio.com>
AuthorDate: Thu Apr 18 17:28:52 2019 +0530

    ARROW-5163 : [Gandiva] Cast timestamp/date are incorrectly evaluating year 0097 to 1997
    
    Consider the length of the year string before modifying it. year string "0097" should be considered as 0097 and year string "97" should be considered as 1997.
    
    Author: shyam <sh...@dremio.com>
    
    Closes #4146 from shyambits2004/cast_time_date and squashes the following commits:
    
    fee4a08f <shyam> ARROW-5163 :  Cast timestamp/date are incorrectly evaluating year 0097 to 1997
---
 cpp/src/gandiva/precompiled/time.cc      | 12 ++++++++++--
 cpp/src/gandiva/precompiled/time_test.cc | 32 +++++++++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index 22c7cbf..77aca3f 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -528,12 +528,16 @@ date64 castDATE_utf8(int64_t context, const char* input, int32 length) {
   // format : 0 is year, 1 is month and 2 is day.
   int dateFields[3];
   int dateIndex = 0, index = 0, value = 0;
+  int year_str_len = 0;
   while (dateIndex < 3 && index < length) {
     if (!isdigit(input[index])) {
       dateFields[dateIndex++] = value;
       value = 0;
     } else {
       value = (value * 10) + (input[index] - '0');
+      if (dateIndex == TimeFields::kYear) {
+        year_str_len++;
+      }
     }
     index++;
   }
@@ -553,7 +557,7 @@ date64 castDATE_utf8(int64_t context, const char* input, int32 length) {
    * If range of two digits is between 70 - 99 then year = 1970 - 1999
    * Else if two digits is between 00 - 69 = 2000 - 2069
    */
-  if (dateFields[TimeFields::kYear] < 100) {
+  if (dateFields[TimeFields::kYear] < 100 && year_str_len < 4) {
     if (dateFields[TimeFields::kYear] < 70) {
       dateFields[TimeFields::kYear] += 2000;
     } else {
@@ -593,10 +597,14 @@ timestamp castTIMESTAMP_utf8(int64_t context, const char* input, int32 length) {
   int ts_fields[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
   boolean add_displacement = true;
   boolean encountered_zone = false;
+  int year_str_len = 0;
   int ts_field_index = TimeFields::kYear, index = 0, value = 0;
   while (ts_field_index < TimeFields::kMax && index < length) {
     if (isdigit(input[index])) {
       value = (value * 10) + (input[index] - '0');
+      if (ts_field_index == TimeFields::kYear) {
+        year_str_len++;
+      }
     } else {
       ts_fields[ts_field_index] = value;
       value = 0;
@@ -634,7 +642,7 @@ timestamp castTIMESTAMP_utf8(int64_t context, const char* input, int32 length) {
   }
 
   // adjust the year
-  if (ts_fields[TimeFields::kYear] < 100) {
+  if (ts_fields[TimeFields::kYear] < 100 && year_str_len < 4) {
     if (ts_fields[TimeFields::kYear] < 70) {
       ts_fields[TimeFields::kYear] += 2000;
     } else {
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 6f337d6..aaadf0f 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -28,7 +28,17 @@ TEST(TestTime, TestCastDate) {
   int64_t context_ptr = reinterpret_cast<int64_t>(&context);
 
   EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1", 9), -65836800000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "2067-12-1", 9), 3089923200000);
+
+  EXPECT_EQ(castDATE_utf8(context_ptr, "7-12-1", 6), 1196467200000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "067-12-1", 8), 3089923200000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "0067-12-1", 9), -60023980800000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "00067-12-1", 10), -60023980800000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "167-12-1", 8), -56868307200000);
+
   EXPECT_EQ(castDATE_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "72-12-1", 7), 92016000000);
 
   EXPECT_EQ(castDATE_utf8(context_ptr, "1972222222", 10), 0);
   EXPECT_EQ(context.get_error(), "Not a valid date value 1972222222");
@@ -38,8 +48,8 @@ TEST(TestTime, TestCastDate) {
   EXPECT_EQ(castDATE_utf8(context_ptr, "1967-12-1bb", 11), -65836800000);
 
   EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
-  EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 7), 3061065600000);
-  EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 7), 31536000000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 6), 3061065600000);
+  EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000);
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0);
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);
 }
@@ -49,10 +59,22 @@ TEST(TestTime, TestCastTimestamp) {
   int64_t context_ptr = reinterpret_cast<int64_t>(&context);
 
   EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1967-12-1", 9), -65836800000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2067-12-1", 9), 3089923200000);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "7-12-1", 6), 1196467200000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "067-12-1", 8), 3089923200000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "0067-12-1", 9), -60023980800000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "00067-12-1", 10), -60023980800000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "167-12-1", 8), -56868307200000);
+
   EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000);
-  EXPECT_EQ(castDATE_utf8(context_ptr, "67-12-1", 7), 3089923200000);
-  EXPECT_EQ(castDATE_utf8(context_ptr, "67-1-1", 7), 3061065600000);
-  EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 7), 31536000000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "72-12-1", 7), 92016000000);
+
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "1972-12-1", 9), 92016000000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-12-1", 7), 3089923200000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "67-1-1", 6), 3061065600000);
+  EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "71-1-1", 6), 31536000000);
 
   EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30", 18), 969702330000);
   EXPECT_EQ(castTIMESTAMP_utf8(context_ptr, "2000-09-23 9:45:30.920", 22), 969702330920);