You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/04/20 19:21:59 UTC

[arrow] 17/28: ARROW-8360: [C++][Gandiva] Fixes date32 support for date/time functions

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit cc2b8cc6a50c268439d6a63035960f9a44bcd38d
Author: Yuan Zhou <yu...@intel.com>
AuthorDate: Sat Apr 18 18:25:40 2020 +0530

    ARROW-8360: [C++][Gandiva] Fixes date32 support for date/time functions
    
    Gandiva date/time functions like extractYear only work with
    millisecond, passing date32 to these functions will get wrong
    results.
    
    This patch adds a new function castDATE_date32 to convert date32
    to date64. date/time functions should do a castDATE_date32 first,
    e.g.: extractYear(castDATE(date32_days)).
    
    Signed-off-by: Yuan Zhou <yu...@intel.com>
    
    Closes #6861 from zhouyuan/wip_gandiva_func_date32 and squashes the following commits:
    
    a3019a303 <Yuan Zhou>  Fixes date32 support for date/time functions
    
    Authored-by: Yuan Zhou <yu...@intel.com>
    Signed-off-by: Praveen <pr...@dremio.com>
---
 cpp/src/gandiva/function_registry_arithmetic.cc |  1 +
 cpp/src/gandiva/precompiled/time.cc             |  5 +-
 cpp/src/gandiva/precompiled/time_test.cc        |  2 +
 cpp/src/gandiva/precompiled/types.h             |  2 +
 cpp/src/gandiva/tests/date_time_test.cc         | 75 +++++++++++++++++++++----
 5 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index d24020f..dca273e 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -62,6 +62,7 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
 
       UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int64, date64),
       UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int32, date32),
+      UNARY_SAFE_NULL_IF_NULL(castDATE, {}, date32, date64),
 
       // add/sub/multiply/divide/mod
       BINARY_SYMMETRIC_FN(add, {}), BINARY_SYMMETRIC_FN(subtract, {}),
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index fa38e13..bdfc189 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -37,7 +37,6 @@ extern "C" {
 
 // Expand inner macro for all date types.
 #define DATE_TYPES(INNER) \
-  INNER(date32)           \
   INNER(date64)           \
   INNER(timestamp)
 
@@ -454,9 +453,13 @@ DATE_TRUNC_FUNCTIONS(timestamp)
 
 FORCE_INLINE
 gdv_date64 castDATE_int64(gdv_int64 in) { return in; }
+
 FORCE_INLINE
 gdv_date32 castDATE_int32(gdv_int32 in) { return in; }
 
+FORCE_INLINE
+gdv_date64 castDATE_date32(gdv_date32 days) { return days * MILLIS_IN_DAY; }
+
 static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
 bool IsLastDayOfMonth(const EpochTimePoint& tp) {
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 9771b7e..bb734d9 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -52,6 +52,8 @@ TEST(TestTime, TestCastDate) {
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000);
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0);
   EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);
+
+  EXPECT_EQ(castDATE_date32(1), 86400000);
 }
 
 TEST(TestTime, TestCastTimestamp) {
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 45bc72b..11529bf 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -169,6 +169,8 @@ gdv_date64 castDATE_utf8(int64_t execution_context, const char* input, gdv_int32
 
 gdv_date64 castDATE_int64(gdv_int64 date);
 
+gdv_date64 castDATE_date32(gdv_date32 date);
+
 gdv_date32 castDATE_int32(gdv_int32 date);
 
 gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input,
diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc
index 79b6109..11371b0 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -19,6 +19,7 @@
 #include <math.h>
 #include <time.h>
 #include "arrow/memory_pool.h"
+#include "gandiva/precompiled/time_constants.h"
 #include "gandiva/projector.h"
 #include "gandiva/tests/test_util.h"
 #include "gandiva/tree_expr_builder.h"
@@ -88,6 +89,26 @@ int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_
   return static_cast<int64_t>(ts - base_line) * 1000 + millis;
 }
 
+int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
+                  int32_t min, int32_t sec, int32_t millis) {
+  struct tm given_ts;
+  memset(&given_ts, 0, sizeof(struct tm));
+  given_ts.tm_year = (yy - 1900);
+  given_ts.tm_mon = (mm - 1);
+  given_ts.tm_mday = dd;
+  given_ts.tm_hour = hr;
+  given_ts.tm_min = min;
+  given_ts.tm_sec = sec;
+
+  time_t ts = mktime(&given_ts);
+  if (ts == static_cast<time_t>(-1)) {
+    ARROW_LOG(FATAL) << "mktime() failed";
+  }
+  // time_t is an arithmetic type on both POSIX and Windows, we can simply
+  // subtract to get a duration in seconds.
+  return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY);
+}
+
 TEST_F(TestProjector, TestIsNull) {
   auto d0 = field("d0", date64());
   auto t0 = field("t0", time32(arrow::TimeUnit::MILLI));
@@ -170,14 +191,16 @@ TEST_F(TestProjector, TestDate32IsNull) {
 
 TEST_F(TestProjector, TestDateTime) {
   auto field0 = field("f0", date64());
+  auto field1 = field("f1", date32());
   auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
-  auto schema = arrow::schema({field0, field2});
+  auto schema = arrow::schema({field0, field1, field2});
 
   // output fields
   auto field_year = field("yy", int64());
   auto field_month = field("mm", int64());
   auto field_day = field("dd", int64());
   auto field_hour = field("hh", int64());
+  auto field_date64 = field("date64", date64());
 
   // extract year and month from date
   auto date2year_expr =
@@ -185,15 +208,30 @@ TEST_F(TestProjector, TestDateTime) {
   auto date2month_expr =
       TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month);
 
+  // extract year and month from date32, cast to date64 first
+  auto node_f1 = TreeExprBuilder::MakeField(field1);
+  auto date32_to_date64_func =
+      TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64());
+
+  auto date64_2year_func =
+      TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64());
+  auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year);
+
+  auto date64_2month_func =
+      TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64());
+  auto date64_2month_expr =
+      TreeExprBuilder::MakeExpression(date64_2month_func, field_month);
+
   // extract month and day from timestamp
   auto ts2month_expr =
       TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month);
   auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);
 
   std::shared_ptr<Projector> projector;
-  auto status = Projector::Make(
-      schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr},
-      TestConfiguration(), &projector);
+  auto status = Projector::Make(schema,
+                                {date2year_expr, date2month_expr, date64_2year_expr,
+                                 date64_2month_expr, ts2month_expr, ts2day_expr},
+                                TestConfiguration(), &projector);
   ASSERT_TRUE(status.ok());
 
   // Create a row-batch with some sample data
@@ -207,6 +245,13 @@ TEST_F(TestProjector, TestDateTime) {
   auto array0 =
       MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity);
 
+  std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
+                                      DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
+                                      DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
+                                      DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
+  auto array1 =
+      MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity);
+
   std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
                                       MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0),
                                       MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0),
@@ -216,16 +261,20 @@ TEST_F(TestProjector, TestDateTime) {
       arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity);
 
   // expected output
-  // date 2 year and date 2 month
-  auto exp_yy_from_date = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
-  auto exp_mm_from_date = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
+  // date 2 year and date 2 month for date64
+  auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
+  auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
+
+  // date 2 year and date 2 month for date32
+  auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
+  auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
 
   // ts 2 month and ts 2 day
   auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
   auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);
 
   // prepare input record batch
-  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array2});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
 
   // Evaluate expression
   arrow::ArrayVector outputs;
@@ -233,10 +282,12 @@ TEST_F(TestProjector, TestDateTime) {
   EXPECT_TRUE(status.ok());
 
   // Validate results
-  EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date, outputs.at(0));
-  EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date, outputs.at(1));
-  EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(2));
-  EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(3));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4));
+  EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5));
 }
 
 TEST_F(TestProjector, TestTime) {