You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/10/13 10:05:15 UTC

[GitHub] [arrow] jorisvandenbossche commented on a diff in pull request #14395: ARROW-17960: [C++][Python] Implement list_slice kernel

jorisvandenbossche commented on code in PR #14395:
URL: https://github.com/apache/arrow/pull/14395#discussion_r994425662


##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -116,6 +117,64 @@ TEST(TestScalarNested, ListElementInvalid) {
               Raises(StatusCode::Invalid));
 }
 
+void CheckListSlice(std::shared_ptr<Array> input, std::shared_ptr<Array> expected,
+                    SliceOptions& args) {
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("list_slice", {input}, &args));
+  ASSERT_EQ(result, expected);
+}
+
+TEST(TestScalarNested, ListSlice) {
+  const auto value_types = {float32(), int32()};
+  for (auto value_type : value_types) {
+    auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6]]"),
+                   ArrayFromJSON(fixed_size_list(value_type, 3),
+                                 "[[1, 2, 3], [4, 5, null], [6, null, null]]")};
+    for (auto input : inputs) {
+      SliceOptions args(0, 2);
+      auto expected =
+          ArrayFromJSON(fixed_size_list(value_type, 2), "[[1, 2], [4, 5], [6, null]]");

Review Comment:
   I _think_ I would expect an output of `[[1, 2], [4, 5], [6]]` for the variable sized list case. (like slicing a python list until after the end, eg `[1, 2][:4] == [1, 2]`.
   
   Since slicing a variable sized list still results in a variable sized list type, we can make use of that and don't require to fill with nulls when the list is shorter than the slice we asked for?



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -116,6 +117,64 @@ TEST(TestScalarNested, ListElementInvalid) {
               Raises(StatusCode::Invalid));
 }
 
+void CheckListSlice(std::shared_ptr<Array> input, std::shared_ptr<Array> expected,
+                    SliceOptions& args) {
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("list_slice", {input}, &args));
+  ASSERT_EQ(result, expected);
+}
+
+TEST(TestScalarNested, ListSlice) {
+  const auto value_types = {float32(), int32()};
+  for (auto value_type : value_types) {
+    auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6]]"),
+                   ArrayFromJSON(fixed_size_list(value_type, 3),
+                                 "[[1, 2, 3], [4, 5, null], [6, null, null]]")};

Review Comment:
   Can you also add a top-level null? (assuming the ArrayFromJSON can handle that)



##########
cpp/src/arrow/compute/kernels/scalar_nested.cc:
##########
@@ -87,6 +87,119 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
   return Status::OK();
 }
 
+template <typename Type, typename IndexType>
+struct ListSlice {
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const auto start = OptionsWrapper<SliceOptions>::Get(ctx).start;
+    const auto stop = OptionsWrapper<SliceOptions>::Get(ctx).stop;
+    if (start < 0 || start >= stop) {
+      return Status::Invalid("`start`(", start,
+                             ") should be greater than 0 and greater than `stop`(", stop,

Review Comment:
   ```suggestion
                                ") should be greater than 0 and smaller than `stop`(", stop,
   ```



##########
cpp/src/arrow/compute/kernels/scalar_nested_test.cc:
##########
@@ -116,6 +117,64 @@ TEST(TestScalarNested, ListElementInvalid) {
               Raises(StatusCode::Invalid));
 }
 
+void CheckListSlice(std::shared_ptr<Array> input, std::shared_ptr<Array> expected,
+                    SliceOptions& args) {
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("list_slice", {input}, &args));
+  ASSERT_EQ(result, expected);
+}
+
+TEST(TestScalarNested, ListSlice) {
+  const auto value_types = {float32(), int32()};
+  for (auto value_type : value_types) {
+    auto inputs = {ArrayFromJSON(list(value_type), "[[1, 2, 3], [4, 5], [6]]"),
+                   ArrayFromJSON(fixed_size_list(value_type, 3),
+                                 "[[1, 2, 3], [4, 5, null], [6, null, null]]")};
+    for (auto input : inputs) {
+      SliceOptions args(0, 2);
+      auto expected =
+          ArrayFromJSON(fixed_size_list(value_type, 2), "[[1, 2], [4, 5], [6, null]]");
+      CheckListSlice(input, expected, args);
+
+      args.start = 1;
+      expected = ArrayFromJSON(fixed_size_list(value_type, 1), "[[2], [5], [null]]");
+      CheckListSlice(input, expected, args);
+
+      args.start = 2;
+      args.stop = 4;

Review Comment:
   Another corner case to test: start and stop being equal (so getting empty lists)
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org