You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ra...@apache.org on 2022/04/19 14:56:31 UTC

[arrow] branch master updated: ARROW-14631: [C++][Gandiva] Implement Nextday Function

This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3715e46f14 ARROW-14631: [C++][Gandiva] Implement Nextday Function
3715e46f14 is described below

commit 3715e46f145400af3a394aacf4dfa6e1b1a651a9
Author: Vinicius Roque <ho...@gmail.com>
AuthorDate: Tue Apr 19 20:26:14 2022 +0530

    ARROW-14631: [C++][Gandiva] Implement Nextday Function
    
    Closes #12285 from ViniciusSouzaRoque/feature/add-nextday-function
    
    Lead-authored-by: Vinicius Roque <ho...@gmail.com>
    Co-authored-by: ViniciusSouzaRoque <vi...@dremio.com>
    Signed-off-by: Pindikura Ravindra <ra...@dremio.com>
---
 cpp/src/gandiva/function_registry_common.h    |  6 +++++
 cpp/src/gandiva/function_registry_datetime.cc |  4 +++
 cpp/src/gandiva/precompiled/time.cc           | 38 +++++++++++++++++++++++++++
 cpp/src/gandiva/precompiled/time_test.cc      | 36 +++++++++++++++++++++++++
 cpp/src/gandiva/precompiled/types.h           |  3 +++
 cpp/src/gandiva/tests/projector_test.cc       | 37 ++++++++++++++++++++++++++
 6 files changed, 124 insertions(+)

diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index 5ce21125ab..9ec9be1b54 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -185,6 +185,12 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
   NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
                  date64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_from_##TYPE))
 
+#define NEXT_DAY_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                       \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                     \
+                 DataTypeVector{TYPE(), utf8()}, date64(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_from_##TYPE),                         \
+                 NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
+
 // Hash32 functions that :
 // - NULL handling is of type NULL_NEVER
 //
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index 816070e1f9..a890c6458d 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -45,6 +45,8 @@ namespace gandiva {
       TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {"minute"}), \
       TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {"second"})
 
+#define NEXT_DAY_FNS(name) DATE_TYPES(NEXT_DAY_SAFE_NULL_IF_NULL, name, {})
+
 std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
   static std::vector<NativeFunction> date_time_fn_registry_ = {
       UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval),
@@ -58,6 +60,8 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
 
       TIME_EXTRACTION_FNS(extract),
 
+      NEXT_DAY_FNS(next_day),
+
       NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
                      "castDATE_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc
index 1f5e76d03b..9c307a857a 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -241,6 +241,44 @@ int getJanWeekOfYear(const EpochTimePoint& tp) {
   return 52;
 }
 
+static const char* WEEK[] = {"SUNDAY",   "MONDAY", "TUESDAY", "WEDNESDAY",
+                             "THURSDAY", "FRIDAY", "SATURDAY"};
+
+static const int WEEK_LEN[] = {6, 6, 7, 9, 8, 6, 8};
+
+#define NEXT_DAY_FUNC(TYPE)                                                              \
+  FORCE_INLINE                                                                           \
+  gdv_date64 next_day_from_##TYPE(gdv_int64 context, gdv_##TYPE millis, const char* in,  \
+                                  int32_t in_len) {                                      \
+    EpochTimePoint tp(millis);                                                           \
+    const auto& dayWithoutHoursAndSec = tp.ClearTimeOfDay();                             \
+    const auto& presentDate = extractDow_timestamp(tp.MillisSinceEpoch());               \
+                                                                                         \
+    int dateSearch = 0;                                                                  \
+    for (int n = 0; n < 7; n++) {                                                        \
+      if (is_substr_utf8_utf8(WEEK[n], WEEK_LEN[n], in, in_len)) {                       \
+        dateSearch = n + 1;                                                              \
+        break;                                                                           \
+      }                                                                                  \
+    }                                                                                    \
+    if (dateSearch == 0) {                                                               \
+      gdv_fn_context_set_error_msg(context, "The weekday in this entry is invalid");     \
+      return 0;                                                                          \
+    }                                                                                    \
+                                                                                         \
+    int64_t distanceDay = dateSearch - presentDate;                                      \
+    if (distanceDay <= 0) {                                                              \
+      distanceDay = 7 + distanceDay;                                                     \
+    }                                                                                    \
+                                                                                         \
+    int64_t nextDate =                                                                   \
+        date_add_int64_timestamp(distanceDay, dayWithoutHoursAndSec.MillisSinceEpoch()); \
+                                                                                         \
+    return nextDate;                                                                     \
+  }
+
+DATE_TYPES(NEXT_DAY_FUNC)
+
 // Dec 29-31
 int getDecWeekOfYear(const EpochTimePoint& tp) {
   int next_jan1_wday = (tp.TmWday() + (31 - tp.TmMday()) + 1) % 7;
diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc
index dcb0f6278e..94c22245c4 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -847,6 +847,42 @@ TEST(TestTime, TestCastTimestampToDate) {
   EXPECT_EQ(StringToTimestamp("2000-05-01 00:00:00"), out);
 }
 
+TEST(TestTime, TestNextDay) {
+  ExecutionContext context;
+  int64_t context_ptr = reinterpret_cast<int64_t>(&context);
+
+  gdv_timestamp ts = StringToTimestamp("2021-11-08 10:20:34");
+  auto out = next_day_from_timestamp(context_ptr, ts, "FR", 2);
+  EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+  out = next_day_from_timestamp(context_ptr, ts, "FRI", 3);
+  EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+  out = next_day_from_timestamp(context_ptr, ts, "FRIDAY", 6);
+  EXPECT_EQ(StringToTimestamp("2021-11-12 00:00:00"), out);
+
+  ts = StringToTimestamp("2015-08-06 11:12:30");
+  out = next_day_from_timestamp(context_ptr, ts, "THU", 3);
+  EXPECT_EQ(StringToTimestamp("2015-08-13 00:00:00"), out);
+
+  ts = StringToTimestamp("2012-08-14 11:12:30");
+  out = next_day_from_timestamp(context_ptr, ts, "TUE", 3);
+  EXPECT_EQ(StringToTimestamp("2012-08-21 00:00:00"), out);
+
+  ts = StringToTimestamp("2012-12-12 12:00:00");
+  out = next_day_from_timestamp(context_ptr, ts, "TU", 2);
+  EXPECT_EQ(StringToTimestamp("2012-12-18 00:00:00"), out);
+
+  ts = StringToTimestamp("2000-01-01 20:15:00");
+  out = next_day_from_timestamp(context_ptr, ts, "SATURDAY", 8);
+  EXPECT_EQ(StringToTimestamp("2000-01-08 00:00:00"), out);
+
+  ts = StringToTimestamp("2015-08-06 11:12:30");
+  out = next_day_from_timestamp(context_ptr, ts, "AHSRK", 5);
+  EXPECT_EQ(context.get_error(), "The weekday in this entry is invalid");
+  context.Reset();
+}
+
 TEST(TestTime, TestCastTimestampToTime) {
   gdv_timestamp ts = StringToTimestamp("2000-05-01 10:20:34");
   auto expected_response =
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index eeb7f2934a..1f1e42deb6 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -426,6 +426,9 @@ gdv_time32 castTIME_int32(int32_t int_val);
 const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*);
 gdv_date64 last_day_from_timestamp(gdv_date64 millis);
 
+gdv_date64 next_day_from_timestamp(gdv_int64 context, gdv_date64 millis, const char* in,
+                                   int32_t in_len);
+
 gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale);
 
 const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 7a040502f7..98b422d7fa 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -2487,4 +2487,41 @@ TEST_F(TestProjector, TestInstr) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
 }
 
+TEST_F(TestProjector, TestNextDay) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::date64());
+  auto field1 = field("f1", arrow::utf8());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_next_day = field("nextday", arrow::date64());
+
+  // Build expression
+  auto next_day_exp =
+      TreeExprBuilder::MakeExpression("next_day", {field0, field1}, field_next_day);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {next_day_exp}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 2;
+  auto array0 = MakeArrowArrayDate64({1636366834000, 1636366834000}, {true, true});
+
+  auto array1 = MakeArrowArrayUtf8({"FRIDAY", "FRI"}, {true, true});
+  // expected output
+  auto exp = MakeArrowArrayDate64({1636675200000, 1636675200000}, {true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
 }  // namespace gandiva