You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2022/04/19 14:56:31 UTC
[arrow] branch master updated: ARROW-14631: [C++][Gandiva] Implement Nextday Function
This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3715e46f14 ARROW-14631: [C++][Gandiva] Implement Nextday Function
3715e46f14 is described below
commit 3715e46f145400af3a394aacf4dfa6e1b1a651a9
Author: Vinicius Roque <ho...@gmail.com>
AuthorDate: Tue Apr 19 20:26:14 2022 +0530
ARROW-14631: [C++][Gandiva] Implement Nextday Function
Closes #12285 from ViniciusSouzaRoque/feature/add-nextday-function
Lead-authored-by: Vinicius Roque <ho...@gmail.com>
Co-authored-by: ViniciusSouzaRoque <vi...@dremio.com>
Signed-off-by: Pindikura Ravindra <ra...@dremio.com>
---
cpp/src/gandiva/function_registry_common.h | 6 +++++
cpp/src/gandiva/function_registry_datetime.cc | 4 +++
cpp/src/gandiva/precompiled/time.cc | 38 +++++++++++++++++++++++++++
cpp/src/gandiva/precompiled/time_test.cc | 36 +++++++++++++++++++++++++
cpp/src/gandiva/precompiled/types.h | 3 +++
cpp/src/gandiva/tests/projector_test.cc | 37 ++++++++++++++++++++++++++
6 files changed, 124 insertions(+)
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index 5ce21125ab..9ec9be1b54 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -185,6 +185,12 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
date64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_from_##TYPE))
+#define NEXT_DAY_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE) \
+ NativeFunction(#NAME, std::vector<std::string> ALIASES, \
+ DataTypeVector{TYPE(), utf8()}, date64(), kResultNullIfNull, \
+ ARROW_STRINGIFY(NAME##_from_##TYPE), \
+ NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
+
// Hash32 functions that :
// - NULL handling is of type NULL_NEVER
//
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index 816070e1f9..a890c6458d 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -45,6 +45,8 @@ namespace gandiva {
TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {"minute"}), \
TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {"second"})
+#define NEXT_DAY_FNS(name) DATE_TYPES(NEXT_DAY_SAFE_NULL_IF_NULL, name, {})
+
std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
static std::vector<NativeFunction> date_time_fn_registry_ = {
UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval),
@@ -58,6 +60,8 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
TIME_EXTRACTION_FNS(extract),
+ NEXT_DAY_FNS(next_day),
+
NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
"castDATE_utf8",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index 1f5e76d03b..9c307a857a 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -241,6 +241,44 @@ int getJanWeekOfYear(const EpochTimePoint& tp) {
return 52;
}
+static const char* WEEK[] = {"SUNDAY", "MONDAY", "TUESDAY", "WEDNESDAY",
+ "THURSDAY", "FRIDAY", "SATURDAY"};
+
+static const int WEEK_LEN[] = {6, 6, 7, 9, 8, 6, 8};
+
+#define NEXT_DAY_FUNC(TYPE) \
+ FORCE_INLINE \
+ gdv_date64 next_day_from_##TYPE(gdv_int64 context, gdv_##TYPE millis, const char* in, \
+ int32_t in_len) { \
+ EpochTimePoint tp(millis); \
+ const auto& dayWithoutHoursAndSec = tp.ClearTimeOfDay(); \
+ const auto& presentDate = extractDow_timestamp(tp.MillisSinceEpoch()); \
+ \
+ int dateSearch = 0; \
+ for (int n = 0; n < 7; n++) { \
+ if (is_substr_utf8_utf8(WEEK[n], WEEK_LEN[n], in, in_len)) { \
+ dateSearch = n + 1; \
+ break; \
+ } \
+ } \
+ if (dateSearch == 0) { \
+ gdv_fn_context_set_error_msg(context, "The weekday in this entry is invalid"); \
+ return 0; \
+ } \
+ \
+ int64_t distanceDay = dateSearch - presentDate; \
+ if (distanceDay <= 0) { \
+ distanceDay = 7 + distanceDay; \
+ } \
+ \
+ int64_t nextDate = \
+ date_add_int64_timestamp(distanceDay, dayWithoutHoursAndSec.MillisSinceEpoch()); \
+ \
+ return nextDate; \
+ }
+
+DATE_TYPES(NEXT_DAY_FUNC)
+
// Dec 29-31
int getDecWeekOfYear(const EpochTimePoint& tp) {
int next_jan1_wday = (tp.TmWday() + (31 - tp.TmMday()) + 1) % 7;
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index dcb0f6278e..94c22245c4 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -847,6 +847,42 @@ TEST(TestTime, TestCastTimestampToDate) {
EXPECT_EQ(StringToTimestamp("2000-05-01 00:00:00"), out);
}
+TEST(TestTime, TestNextDay) {
+ ExecutionContext context;
+ int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+ gdv_timestamp ts = StringToTimestamp("2021-11-08 10:20:34");
+ auto out = next_day_from_timestamp(context_ptr, ts, "FR", 2);
+ EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+ out = next_day_from_timestamp(context_ptr, ts, "FRI", 3);
+ EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+ out = next_day_from_timestamp(context_ptr, ts, "FRIDAY", 6);
+ EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+ ts = StringToTimestamp("2015-08-06 11:12:30");
+ out = next_day_from_timestamp(context_ptr, ts, "THU", 3);
+ EXPECT_EQ(StringToTimestamp("2015-08-13 00:00:00"), out);
+
+ ts = StringToTimestamp("2012-08-14 11:12:30");
+ out = next_day_from_timestamp(context_ptr, ts, "TUE", 3);
+ EXPECT_EQ(StringToTimestamp("2012-08-21 00:00:00"), out);
+
+ ts = StringToTimestamp("2012-12-12 12:00:00");
+ out = next_day_from_timestamp(context_ptr, ts, "TU", 2);
+ EXPECT_EQ(StringToTimestamp("2012-12-18 00:00:00"), out);
+
+ ts = StringToTimestamp("2000-01-01 20:15:00");
+ out = next_day_from_timestamp(context_ptr, ts, "SATURDAY", 8);
+ EXPECT_EQ(StringToTimestamp("2000-01-08 00:00:00"), out);
+
+ ts = StringToTimestamp("2015-08-06 11:12:30");
+ out = next_day_from_timestamp(context_ptr, ts, "AHSRK", 5);
+ EXPECT_EQ(context.get_error(), "The weekday in this entry is invalid");
+ context.Reset();
+}
+
TEST(TestTime, TestCastTimestampToTime) {
gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
auto expected_response =
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index eeb7f2934a..1f1e42deb6 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -426,6 +426,9 @@ gdv_time32 castTIME_int32(int32_t int_val);
const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*);
gdv_date64 last_day_from_timestamp(gdv_date64 millis);
+gdv_date64 next_day_from_timestamp(gdv_int64 context, gdv_date64 millis, const char* in,
+ int32_t in_len);
+
gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 7a040502f7..98b422d7fa 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -2487,4 +2487,41 @@ TEST_F(TestProjector, TestInstr) {
EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
}
+TEST_F(TestProjector, TestNextDay) {
+ // schema for input fields
+ auto field0 = field("f0", arrow::date64());
+ auto field1 = field("f1", arrow::utf8());
+ auto schema = arrow::schema({field0, field1});
+
+ // output fields
+ auto field_next_day = field("nextday", arrow::date64());
+
+ // Build expression
+ auto next_day_exp =
+ TreeExprBuilder::MakeExpression("next_day", {field0, field1}, field_next_day);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, {next_day_exp}, TestConfiguration(), &projector);
+ EXPECT_TRUE(status.ok());
+
+ // Create a row-batch with some sample data
+ int num_records = 2;
+ auto array0 = MakeArrowArrayDate64({1636366834000, 1636366834000}, {true, true});
+
+ auto array1 = MakeArrowArrayUtf8({"FRIDAY", "FRI"}, {true, true});
+ // expected output
+ auto exp = MakeArrowArrayDate64({1636675200000, 1636675200000}, {true, true});
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ // Validate results
+ EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
} // namespace gandiva