You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/04/20 19:21:59 UTC
[arrow] 17/28: ARROW-8360: [C++][Gandiva] Fixes date32 support for
date/time functions
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit cc2b8cc6a50c268439d6a63035960f9a44bcd38d
Author: Yuan Zhou <yu...@intel.com>
AuthorDate: Sat Apr 18 18:25:40 2020 +0530
ARROW-8360: [C++][Gandiva] Fixes date32 support for date/time functions
Gandiva date/time functions like extractYear only work with
millisecond, passing date32 to these functions will get wrong
results.
This patch adds a new function castDATE_date32 to convert date32
to date64. date/time functions should do a castDATE_date32 first,
e.g.: extractYear(castDATE(date32_days)).
Signed-off-by: Yuan Zhou <yu...@intel.com>
Closes #6861 from zhouyuan/wip_gandiva_func_date32 and squashes the following commits:
a3019a303 <Yuan Zhou> Fixes date32 support for date/time functions
Authored-by: Yuan Zhou <yu...@intel.com>
Signed-off-by: Praveen <pr...@dremio.com>
---
cpp/src/gandiva/function_registry_arithmetic.cc | 1 +
cpp/src/gandiva/precompiled/time.cc | 5 +-
cpp/src/gandiva/precompiled/time_test.cc | 2 +
cpp/src/gandiva/precompiled/types.h | 2 +
cpp/src/gandiva/tests/date_time_test.cc | 75 +++++++++++++++++++++----
5 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index d24020f..dca273e 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -62,6 +62,7 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int64, date64),
UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int32, date32),
+ UNARY_SAFE_NULL_IF_NULL(castDATE, {}, date32, date64),
// add/sub/multiply/divide/mod
BINARY_SYMMETRIC_FN(add, {}), BINARY_SYMMETRIC_FN(subtract, {}),
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index fa38e13..bdfc189 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -37,7 +37,6 @@ extern "C" {
// Expand inner macro for all date types.
#define DATE_TYPES(INNER) \
- INNER(date32) \
INNER(date64) \
INNER(timestamp)
@@ -454,9 +453,13 @@ DATE_TRUNC_FUNCTIONS(timestamp)
FORCE_INLINE
gdv_date64 castDATE_int64(gdv_int64 in) { return in; }
+
FORCE_INLINE
gdv_date32 castDATE_int32(gdv_int32 in) { return in; }
+FORCE_INLINE
+gdv_date64 castDATE_date32(gdv_date32 days) { return days * MILLIS_IN_DAY; }
+
static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
bool IsLastDayOfMonth(const EpochTimePoint& tp) {
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index 9771b7e..bb734d9 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -52,6 +52,8 @@ TEST(TestTime, TestCastDate) {
EXPECT_EQ(castDATE_utf8(context_ptr, "71-1-1", 6), 31536000000);
EXPECT_EQ(castDATE_utf8(context_ptr, "71-45-1", 7), 0);
EXPECT_EQ(castDATE_utf8(context_ptr, "71-12-XX", 8), 0);
+
+ EXPECT_EQ(castDATE_date32(1), 86400000);
}
TEST(TestTime, TestCastTimestamp) {
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 45bc72b..11529bf 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -169,6 +169,8 @@ gdv_date64 castDATE_utf8(int64_t execution_context, const char* input, gdv_int32
gdv_date64 castDATE_int64(gdv_int64 date);
+gdv_date64 castDATE_date32(gdv_date32 date);
+
gdv_date32 castDATE_int32(gdv_int32 date);
gdv_timestamp castTIMESTAMP_utf8(int64_t execution_context, const char* input,
diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc
index 79b6109..11371b0 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -19,6 +19,7 @@
#include <math.h>
#include <time.h>
#include "arrow/memory_pool.h"
+#include "gandiva/precompiled/time_constants.h"
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
@@ -88,6 +89,26 @@ int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_
return static_cast<int64_t>(ts - base_line) * 1000 + millis;
}
+int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
+ int32_t min, int32_t sec, int32_t millis) {
+ struct tm given_ts;
+ memset(&given_ts, 0, sizeof(struct tm));
+ given_ts.tm_year = (yy - 1900);
+ given_ts.tm_mon = (mm - 1);
+ given_ts.tm_mday = dd;
+ given_ts.tm_hour = hr;
+ given_ts.tm_min = min;
+ given_ts.tm_sec = sec;
+
+ time_t ts = mktime(&given_ts);
+ if (ts == static_cast<time_t>(-1)) {
+ ARROW_LOG(FATAL) << "mktime() failed";
+ }
+ // time_t is an arithmetic type on both POSIX and Windows, we can simply
+ // subtract to get a duration in seconds.
+ return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY);
+}
+
TEST_F(TestProjector, TestIsNull) {
auto d0 = field("d0", date64());
auto t0 = field("t0", time32(arrow::TimeUnit::MILLI));
@@ -170,14 +191,16 @@ TEST_F(TestProjector, TestDate32IsNull) {
TEST_F(TestProjector, TestDateTime) {
auto field0 = field("f0", date64());
+ auto field1 = field("f1", date32());
auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
- auto schema = arrow::schema({field0, field2});
+ auto schema = arrow::schema({field0, field1, field2});
// output fields
auto field_year = field("yy", int64());
auto field_month = field("mm", int64());
auto field_day = field("dd", int64());
auto field_hour = field("hh", int64());
+ auto field_date64 = field("date64", date64());
// extract year and month from date
auto date2year_expr =
@@ -185,15 +208,30 @@ TEST_F(TestProjector, TestDateTime) {
auto date2month_expr =
TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month);
+ // extract year and month from date32, cast to date64 first
+ auto node_f1 = TreeExprBuilder::MakeField(field1);
+ auto date32_to_date64_func =
+ TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64());
+
+ auto date64_2year_func =
+ TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64());
+ auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year);
+
+ auto date64_2month_func =
+ TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64());
+ auto date64_2month_expr =
+ TreeExprBuilder::MakeExpression(date64_2month_func, field_month);
+
// extract month and day from timestamp
auto ts2month_expr =
TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month);
auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);
std::shared_ptr<Projector> projector;
- auto status = Projector::Make(
- schema, {date2year_expr, date2month_expr, ts2month_expr, ts2day_expr},
- TestConfiguration(), &projector);
+ auto status = Projector::Make(schema,
+ {date2year_expr, date2month_expr, date64_2year_expr,
+ date64_2month_expr, ts2month_expr, ts2day_expr},
+ TestConfiguration(), &projector);
ASSERT_TRUE(status.ok());
// Create a row-batch with some sample data
@@ -207,6 +245,13 @@ TEST_F(TestProjector, TestDateTime) {
auto array0 =
MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity);
+ std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
+ DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
+ DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
+ DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
+ auto array1 =
+ MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity);
+
std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0),
MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0),
@@ -216,16 +261,20 @@ TEST_F(TestProjector, TestDateTime) {
arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity);
// expected output
- // date 2 year and date 2 month
- auto exp_yy_from_date = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
- auto exp_mm_from_date = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
+ // date 2 year and date 2 month for date64
+ auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
+ auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
+
+ // date 2 year and date 2 month for date32
+ auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
+ auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
// ts 2 month and ts 2 day
auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);
// prepare input record batch
- auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array2});
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
// Evaluate expression
arrow::ArrayVector outputs;
@@ -233,10 +282,12 @@ TEST_F(TestProjector, TestDateTime) {
EXPECT_TRUE(status.ok());
// Validate results
- EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date, outputs.at(0));
- EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date, outputs.at(1));
- EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(2));
- EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(3));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4));
+ EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5));
}
TEST_F(TestProjector, TestTime) {