You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/12/13 07:00:10 UTC

[GitHub] [arrow] bkmgit commented on a change in pull request #11882: ARROW-9843: [C++] Implement Between ternary kernel

bkmgit commented on a change in pull request #11882:
URL: https://github.com/apache/arrow/pull/11882#discussion_r767453486



##########
File path: cpp/src/arrow/compute/kernels/scalar_compare_test.cc
##########
@@ -1850,5 +1851,154 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) {
               ResultWith(ScalarFromJSON(date64(), "86400000")));
 }
 
+template <typename ArrowType>
+static void ValidateBetween(const Datum& val, const Datum& lhs, const Datum& rhs,
+                            const Datum& expected) {
+  ASSERT_OK_AND_ASSIGN(Datum result, Between(val, lhs, rhs));
+  AssertArraysEqual(*expected.make_array(), *result.make_array(),
+                    /*verbose=*/true);
+}
+
+template <typename ArrowType>
+static void ValidateBetween(const char* value_str, const Datum& lhs, const Datum& rhs,
+                            const char* expected_str) {
+  auto value = ArrayFromJSON(TypeTraits<ArrowType>::type_singleton(), value_str);
+  auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str);
+  ValidateBetween<ArrowType>(value, lhs, rhs, expected);
+}
+
+template <>
+void ValidateBetween<StringType>(const char* value_str, const Datum& lhs,
+                                 const Datum& rhs, const char* expected_str) {
+  auto value = ArrayFromJSON(utf8(), value_str);
+  auto expected = ArrayFromJSON(TypeTraits<BooleanType>::type_singleton(), expected_str);
+  ValidateBetween<StringType>(value, lhs, rhs, expected);
+}
+
+template <typename ArrowType>
+class TestNumericBetweenKernel : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestNumericBetweenKernel, NumericArrowTypes);
+TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayScalarScalar) {
+  using ScalarType = typename TypeTraits<TypeParam>::ScalarType;
+  using CType = typename TypeTraits<TypeParam>::CType;
+
+  Datum zero(std::make_shared<ScalarType>(CType(0)));
+  Datum four(std::make_shared<ScalarType>(CType(4)));
+  ValidateBetween<TypeParam>("[]", zero, four, "[]");
+  ValidateBetween<TypeParam>("[null]", zero, four, "[null]");
+  ValidateBetween<TypeParam>("[0,0,1,1,2,2]", zero, four, "[0,0,1,1,1,1]");
+  ValidateBetween<TypeParam>("[0,1,2,3,4,5]", zero, four, "[0,1,1,1,0,0]");
+  ValidateBetween<TypeParam>("[5,4,3,2,1,0]", zero, four, "[0,0,1,1,1,0]");
+  ValidateBetween<TypeParam>("[null,0,1,1]", zero, four, "[null,0,1,1]");
+}
+
+TEST(TestSimpleBetweenKernel, SimpleStringTest) {
+  using ScalarType = typename TypeTraits<StringType>::ScalarType;
+  auto l = Datum(std::make_shared<ScalarType>("abc"));
+  auto r = Datum(std::make_shared<ScalarType>("zzz"));
+  ValidateBetween<StringType>("[]", l, r, "[]");
+  ValidateBetween<StringType>("[null]", l, r, "[null]");
+  ValidateBetween<StringType>(R"(["aaa", "aaaa", "ccc", "z"])", l, r,
+                              R"([false, false, true, true])");
+  ValidateBetween<StringType>(R"(["a", "aaaa", "c", "z"])", l, r,
+                              R"([false, false, true, true])");
+  ValidateBetween<StringType>(R"(["a", "aaaa", "fff", "zzzz"])", l, r,
+                              R"([false, false, true, false])");
+  ValidateBetween<StringType>(R"(["abd", null, null, "zzx"])", l, r,
+                              R"([true, null, null, true])");
+}
+
+TEST(TestSimpleBetweenKernel, SimpleTimestampTest) {
+  using ScalarType = typename TypeTraits<TimestampType>::ScalarType;
+  auto checkTimestampArray = [](std::shared_ptr<DataType> type, const char* input_str,
+                                const Datum& lhs, const Datum& rhs,
+                                const char* expected_str) {
+    auto value = ArrayFromJSON(type, input_str);
+    auto expected = ArrayFromJSON(boolean(), expected_str);
+    ValidateBetween<TimestampType>(value, lhs, rhs, expected);
+  };
+  auto unit = TimeUnit::SECOND;
+  auto l = Datum(std::make_shared<ScalarType>(923184000, timestamp(unit)));
+  auto r = Datum(std::make_shared<ScalarType>(1602032602, timestamp(unit)));
+  checkTimestampArray(timestamp(unit), "[]", l, r, "[]");
+  checkTimestampArray(timestamp(unit), "[null]", l, r, "[null]");
+  checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","1900-02-28"])", l,
+                      r, "[false,true,false]");
+  checkTimestampArray(timestamp(unit), R"(["1970-01-01","2000-02-29","2004-02-28"])", l,
+                      r, "[false,true,true]");
+  checkTimestampArray(timestamp(unit), R"(["2018-01-01","1999-04-04","1900-02-28"])", l,
+                      r, "[true,false,false]");
+}
+
+TYPED_TEST(TestNumericBetweenKernel, SimpleBetweenArrayArrayArray) {
+  ValidateBetween<TypeParam>(
+      "[]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"),
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[]"), "[]");
+  ValidateBetween<TypeParam>(
+      "[null]", ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"),
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[null]"), "[null]");
+  ValidateBetween<TypeParam>(
+      "[1,1,2,2,2]",
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,3,3]"),
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,5,5]"),
+      "[true,true,false,false,false]");
+  ValidateBetween<TypeParam>(
+      "[1,1,2,2,2,2]",
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[0,0,1,null,3,3]"),
+      ArrayFromJSON(TypeTraits<TypeParam>::type_singleton(), "[10,10,2,2,5,5]"),
+      "[true,true,false,null,false,false]");
+}
+
+TEST(TestSimpleBetweenKernel, StringArrayArrayArrayTest) {
+  ValidateBetween<StringType>(
+      R"(["david","hello","world"])",
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["adam","hi","whirl"])"),
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+                    R"(["robert","goeiemoreen","whirlwind"])"),
+      "[true, false, false]");
+  ValidateBetween<StringType>(
+      R"(["x","a","f"])",
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["w","a","e"])"),
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["z","a","g"])"),
+      "[true, false, true]");
+  ValidateBetween<StringType>(
+      R"(["block","bit","binary"])",
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+                    R"(["bit","nibble","ternary"])"),
+      ArrayFromJSON(TypeTraits<StringType>::type_singleton(), R"(["word","d","xyz"])"),
+      "[true, false, false]");
+  ValidateBetween<StringType>(R"(["Ayumi","アユミ","王梦莹"])",
+                              ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+                                            R"(["たなか","あゆみ","歩美"])"),
+                              ArrayFromJSON(TypeTraits<StringType>::type_singleton(),
+                                            R"(["李平之","田中","たなか"])"),
+                              "[false, true, false]");

Review comment:
       Yes. It can be removed later if a different implementation is done with different orderings, but non ASCII characters in UTF8 are likely to occur.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org