You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/11/08 09:54:11 UTC

[GitHub] [arrow] jorisvandenbossche commented on a diff in pull request #14495: ARROW-17989: [C++][Python] Enable struct_field kernel to accept string field names

jorisvandenbossche commented on code in PR #14495:
URL: https://github.com/apache/arrow/pull/14495#discussion_r1016318695


##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -124,6 +124,13 @@ TEST(TestScalarNested, StructField) {
   StructFieldOptions invalid2({2, 4});
   StructFieldOptions invalid3({3});
   StructFieldOptions invalid4({0, 1});
+
+  // Test using FieldRefs
+  StructFieldOptions extract0_field_ref_path(FieldRef(FieldPath({0})));
+  StructFieldOptions extract0_field_ref_name(FieldRef("a"));
+  ASSERT_OK_AND_ASSIGN(auto dotted_path_ref, FieldRef::FromDotPath(".c.d"));
+  StructFieldOptions extract20_field_ref_nest(dotted_path_ref);

Review Comment:
   ```suggestion
     StructFieldOptions extract20_field_ref_nest(FieldRef::FromDotPath(".c.d"));
   ```



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
                 &extract0);
     CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, null]"),
                 &extract20);
+
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, null]"),
+                &extract0_field_ref_path);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, null]"),
+                &extract0_field_ref_name);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, null]"),
+                &extract20_field_ref_nest);
+
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field reference"),
                                     CallFunction("struct_field", {arr}, &invalid1));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                    ::testing::HasSubstr("out-of-bounds field reference"),
+                                    ::testing::HasSubstr("No match for FieldRef"),

Review Comment:
   The original error seems more informative here. Would there be a way to preserve that? Although I suppose that would need to be done in `FieldRef::FindOne`?



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -141,16 +148,25 @@ TEST(TestScalarNested, StructField) {
                 &extract0);
     CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, null]"),
                 &extract20);
+
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, null]"),
+                &extract0_field_ref_path);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int32(), "[1, null, 3, null]"),
+                &extract0_field_ref_name);
+    CheckScalar("struct_field", {arr}, ArrayFromJSON(int64(), "[10, 11, 12, null]"),
+                &extract20_field_ref_nest);
+
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field reference"),
                                     CallFunction("struct_field", {arr}, &invalid1));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                    ::testing::HasSubstr("out-of-bounds field reference"),
+                                    ::testing::HasSubstr("No match for FieldRef"),
                                     CallFunction("struct_field", {arr}, &invalid2));
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
                                     ::testing::HasSubstr("out-of-bounds field reference"),
                                     CallFunction("struct_field", {arr}, &invalid3));
-    EXPECT_RAISES_WITH_MESSAGE_THAT(TypeError, ::testing::HasSubstr("cannot subscript"),
+    EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                    ::testing::HasSubstr("No match for FieldRef"),

Review Comment:
   Similarly here



##########
python/pyarrow/_compute.pyx:
##########
@@ -1322,7 +1322,39 @@ class MakeStructOptions(_MakeStructOptions):
 
 cdef class _StructFieldOptions(FunctionOptions):
     def _set_options(self, indices):
-        self.wrapped.reset(new CStructFieldOptions(indices))
+        cdef:
+            CFieldRef field_ref
+            const CFieldRef* field_ref_ptr
+
+        # List[str]/List[bytes] converted to '.a.dotted.path'
+        if isinstance(indices, list) and len(indices):
+            if isinstance(indices[0], str):
+                indices = '.' + '.'.join(indices)
+            elif isinstance(indices[0], bytes):
+                indices = b'.' + b'.'.join(indices)

Review Comment:
   This disallows doing a mix of string and int? Alternatively, I would maybe avoid converting a list to a DottedPath, but rather add a generic list handling case. 
   
   More in general, I think we should align this more with the existing method to create a FieldRef in Python/cython (eg `FieldRef._nested_field` constructor). If we have a list input here, we can just pass it through, and then handle the resulting Expression as you already do here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org