You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/07/08 09:22:32 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7659: ARROW-9287: [C++] Support unsigned dictionary indices

pitrou commented on a change in pull request #7659:
URL: https://github.com/apache/arrow/pull/7659#discussion_r451396580



##########
File path: cpp/src/arrow/testing/util.h
##########
@@ -181,4 +181,11 @@ UnionTypeFactories() {
 // Windows.
 ARROW_TESTING_EXPORT int GetListenPort();
 
+namespace test {

Review comment:
       IMHO we shouldn't introduce this namespace, or we should put all functions in this file in it.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -1744,16 +1744,18 @@ typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, I
 TYPED_TEST_SUITE(TestDictionaryCast, TestTypes);
 
 TYPED_TEST(TestDictionaryCast, Basic) {
-  CastOptions options;
-  std::shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
-  ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
-
-  // TODO: Should casting dictionary scalars work?
-  this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(), options,
-                  /*check_scalar=*/false);
+  std::shared_ptr<Array> dict =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(5, 0);
+  for (auto index_ty : test::dictionary_index_types()) {
+    auto indices = ArrayFromJSON(index_ty, "[4, 0, 1, 2, 0, 4, 2, 2]");

Review comment:
       Add a null perhaps?

##########
File path: dev/archery/archery/integration/datagen.py
##########
@@ -1547,6 +1567,12 @@ def _temp_path():
         .skip_category('Go')
         .skip_category('Rust'),
 
+        # TODO: Dictionary unsigned indices

Review comment:
       Why is it marked TODO? Do you intend to add specific per-language comments instead?

##########
File path: cpp/src/arrow/array/array_dict_test.cc
##########
@@ -1116,6 +1137,31 @@ TEST(TestDictionary, TransposeTrivial) {
   }
 }
 
+TEST(TestDictionary, GetValueIndex) {
+  const char* indices_json = "[0, 1, 2, 3, 4, 5]";

Review comment:
       Something less trivial perhaps?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -1744,16 +1744,18 @@ typedef ::testing::Types<NullType, UInt8Type, Int8Type, UInt16Type, Int16Type, I
 TYPED_TEST_SUITE(TestDictionaryCast, TestTypes);
 
 TYPED_TEST(TestDictionaryCast, Basic) {
-  CastOptions options;
-  std::shared_ptr<Array> plain_array =
-      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(10, 2);
-
-  ASSERT_OK_AND_ASSIGN(Datum encoded, DictionaryEncode(plain_array->data()));
-  ASSERT_EQ(encoded.array()->type->id(), Type::DICTIONARY);
-
-  // TODO: Should casting dictionary scalars work?
-  this->CheckPass(*MakeArray(encoded.array()), *plain_array, plain_array->type(), options,
-                  /*check_scalar=*/false);
+  std::shared_ptr<Array> dict =
+      TestBase::MakeRandomArray<typename TypeTraits<TypeParam>::ArrayType>(5, 0);

Review comment:
       Wouldn't it be more thorough with a non-zero null count?

##########
File path: cpp/src/arrow/scalar.cc
##########
@@ -199,25 +199,30 @@ Result<std::shared_ptr<Scalar>> DictionaryScalar::GetEncodedValue() const {
     return MakeNullScalar(dict_type.value_type());
   }
 
+  const void* index =
+      checked_cast<const internal::PrimitiveScalarBase&>(*value.index).data();

Review comment:
       Why not cast to the target numeric scalar directly instead of making of virtual function call to `data()`?

##########
File path: cpp/src/arrow/type_test.cc
##########
@@ -1608,7 +1603,7 @@ TEST(TestDictionaryType, UnifyNumeric) {
   ASSERT_TRUE(out_type->Equals(*expected));
   ASSERT_TRUE(out_dict->Equals(*expected_dict));
 
-  std::shared_ptr<Buffer> b1, b2, b3;
+  std::shared_ptr<Buffer> b1, b2, b3, b4;

Review comment:
       This doesn't look necessary?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org