You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/07/06 15:50:17 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7519: ARROW-9017: [C++][Python] Refactor scalar bindings

pitrou commented on a change in pull request #7519:
URL: https://github.com/apache/arrow/pull/7519#discussion_r450314430



##########
File path: cpp/src/arrow/scalar_test.cc
##########
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64())));
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_null,
+      checked_cast<const DictionaryScalar&>(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8())));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_alpha,
+      checked_cast<const DictionaryScalar&>(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_gamma,
+      checked_cast<const DictionaryScalar&>(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector<std::shared_ptr<Array>> children{
+      ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+      ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
       Hmm... interesting. Shouldn't it be `MakeNullScalar(utf8())`?

##########
File path: cpp/src/arrow/scalar_test.cc
##########
@@ -80,6 +84,15 @@ TYPED_TEST(TestNumericScalar, Basics) {
 
   auto dyn_null_value = MakeNullScalar(expected_type);
   ASSERT_EQ(*null_value, *dyn_null_value);
+
+  // test Array.GetScalar
+  auto arr = ArrayFromJSON(expected_type, "[null, 1, 2]");
+  ASSERT_OK_AND_ASSIGN(auto null, arr->GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto one, arr->GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto two, arr->GetScalar(2));
+  ASSERT_TRUE(null->Equals(*null_value));
+  ASSERT_TRUE(one->Equals(ScalarType(1)));
+  ASSERT_TRUE(two->Equals(ScalarType(2)));

Review comment:
       You never check that `Equals` returns false?

##########
File path: cpp/src/arrow/scalar.cc
##########
@@ -185,6 +192,35 @@ DictionaryScalar::DictionaryScalar(std::shared_ptr<DataType> type)
                             0)
                 .ValueOrDie()} {}
 
+Result<std::shared_ptr<Scalar>> DictionaryScalar::GetEncodedValue() const {
+  const auto& dict_type = checked_cast<DictionaryType&>(*type);
+
+  if (!is_valid) {
+    return MakeNullScalar(dict_type.value_type());
+  }
+
+  int64_t index_value = 0;
+  switch (dict_type.index_type()->id()) {
+    case Type::INT8:
+      index_value = checked_cast<const Int8Scalar&>(*value.index).value;
+      break;
+    case Type::INT16:
+      index_value = checked_cast<const Int16Scalar&>(*value.index).value;
+      break;
+    case Type::INT32:
+      index_value = checked_cast<const Int32Scalar&>(*value.index).value;
+      break;
+    case Type::INT64:
+      index_value = checked_cast<const Int64Scalar&>(*value.index).value;
+      break;
+    default:
+      return Status::Invalid("Not implemented dictionary index type");

Review comment:
       `Status::TypeError`?

##########
File path: cpp/src/arrow/scalar_test.cc
##########
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64())));
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_null,
+      checked_cast<const DictionaryScalar&>(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8())));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_alpha,
+      checked_cast<const DictionaryScalar&>(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_gamma,
+      checked_cast<const DictionaryScalar&>(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector<std::shared_ptr<Array>> children{
+      ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+      ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
+
+  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
+  ASSERT_TRUE(fifth->Equals(MakeNullScalar(ty)));

Review comment:
       Similarly, shouldn't it be `MakeNullScalar(uint64())`?

##########
File path: cpp/src/arrow/scalar_test.cc
##########
@@ -451,6 +565,127 @@ TEST(TestStructScalar, FieldAccess) {
 
   ASSERT_RAISES(Invalid, abc.field(5).status());
   ASSERT_RAISES(Invalid, abc.field("c").status());
+
+  ASSERT_OK_AND_ASSIGN(auto d, abc.field("d"));
+  ASSERT_TRUE(d->Equals(MakeNullScalar(int64())));
+}
+
+TEST(TestDictionaryScalar, Basics) {
+  auto ty = dictionary(int8(), utf8());
+  auto dict = ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", \"gamma\"]");
+
+  DictionaryScalar::ValueType alpha;
+  ASSERT_OK_AND_ASSIGN(alpha.index, MakeScalar(int8(), 0));
+  alpha.dictionary = dict;
+
+  DictionaryScalar::ValueType gamma;
+  ASSERT_OK_AND_ASSIGN(gamma.index, MakeScalar(int8(), 2));
+  gamma.dictionary = dict;
+
+  auto scalar_null = MakeNullScalar(ty);
+  auto scalar_alpha = DictionaryScalar(alpha, ty);
+  auto scalar_gamma = DictionaryScalar(gamma, ty);
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_null,
+      checked_cast<const DictionaryScalar&>(*scalar_null).GetEncodedValue());
+  ASSERT_TRUE(encoded_null->Equals(MakeNullScalar(utf8())));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_alpha,
+      checked_cast<const DictionaryScalar&>(scalar_alpha).GetEncodedValue());
+  ASSERT_TRUE(encoded_alpha->Equals(MakeScalar("alpha")));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto encoded_gamma,
+      checked_cast<const DictionaryScalar&>(scalar_gamma).GetEncodedValue());
+  ASSERT_TRUE(encoded_gamma->Equals(MakeScalar("gamma")));
+
+  // test Array.GetScalar
+  DictionaryArray arr(ty, ArrayFromJSON(int8(), "[2, 0, 1, null]"), dict);
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_OK_AND_ASSIGN(auto last, arr.GetScalar(3));
+
+  ASSERT_TRUE(first->Equals(scalar_gamma));
+  ASSERT_TRUE(second->Equals(scalar_alpha));
+  ASSERT_TRUE(last->Equals(scalar_null));
+}
+
+TEST(TestSparseUnionScalar, Basics) {
+  auto ty = sparse_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+
+  auto scalar_alpha = SparseUnionScalar(alpha, ty);
+  auto scalar_beta = SparseUnionScalar(beta, ty);
+  auto scalar_two = SparseUnionScalar(two, ty);
+
+  // test Array.GetScalar
+  std::vector<std::shared_ptr<Array>> children{
+      ArrayFromJSON(utf8(), "[\"alpha\", \"\", \"beta\", null, \"gamma\"]"),
+      ArrayFromJSON(uint64(), "[1, 2, 11, 22, null]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  SparseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));
+
+  ASSERT_OK_AND_ASSIGN(auto fifth, arr.GetScalar(4));
+  ASSERT_TRUE(fifth->Equals(MakeNullScalar(ty)));
+}
+
+TEST(TestDenseUnionScalar, Basics) {
+  auto ty = dense_union({field("string", utf8()), field("number", uint64())});
+
+  auto alpha = MakeScalar("alpha");
+  auto beta = MakeScalar("beta");
+  ASSERT_OK_AND_ASSIGN(auto two, MakeScalar(uint64(), 2));
+  ASSERT_OK_AND_ASSIGN(auto three, MakeScalar(uint64(), 3));
+
+  auto scalar_alpha = DenseUnionScalar(alpha, ty);
+  auto scalar_beta = DenseUnionScalar(beta, ty);
+  auto scalar_two = DenseUnionScalar(two, ty);
+  auto scalar_three = DenseUnionScalar(three, ty);
+
+  // test Array.GetScalar
+  std::vector<std::shared_ptr<Array>> children = {
+      ArrayFromJSON(utf8(), "[\"alpha\", \"beta\", null]"),
+      ArrayFromJSON(uint64(), "[2, 3]")};
+
+  auto type_ids = ArrayFromJSON(int8(), "[0, 1, 0, 0, 1]");
+  auto offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]");
+  DenseUnionArray arr(ty, 5, children, type_ids->data()->buffers[1],
+                      offsets->data()->buffers[1]);
+  ASSERT_OK(arr.ValidateFull());
+
+  ASSERT_OK_AND_ASSIGN(auto first, arr.GetScalar(0));
+  ASSERT_TRUE(first->Equals(scalar_alpha));
+
+  ASSERT_OK_AND_ASSIGN(auto second, arr.GetScalar(1));
+  ASSERT_TRUE(second->Equals(scalar_two));
+
+  ASSERT_OK_AND_ASSIGN(auto third, arr.GetScalar(2));
+  ASSERT_TRUE(third->Equals(scalar_beta));
+
+  ASSERT_OK_AND_ASSIGN(auto fourth, arr.GetScalar(3));
+  ASSERT_TRUE(fourth->Equals(MakeNullScalar(ty)));

Review comment:
       `MakeNullScalar(utf8)`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org