You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "paleolimbot (via GitHub)" <gi...@apache.org> on 2024/03/18 19:44:12 UTC

[PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

paleolimbot opened a new pull request, #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405

   (no comment)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#issuecomment-2009647085

   Question: does Nanoarrow IPC support compressed buffers?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532132614


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a file
+static void BenchmarkIpcReadManyBatchesFromFile(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  for (auto _ : state) {
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        MakeFixtureInputStreamFile("many_batches.arrows", input_stream.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a buffer
+static void BenchmarkIpcReadManyBatchesFromBuffer(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  nanoarrow::UniqueBuffer buffer;
+  NANOARROW_THROW_NOT_OK(MakeFixtureBuffer("many_batches.arrows", buffer.get()));
+
+  for (auto _ : state) {
+    // Note: an attempt to remove this copy does not affect the timing for this particular
+    // benchmark (it is possible to set a deallocator that does nothing and manually
+    // assign the data and size_bytes of the copy).
+    nanoarrow::UniqueBuffer buffer_copy;
+    NANOARROW_THROW_NOT_OK(
+        ArrowBufferAppend(buffer_copy.get(), buffer->data, buffer->size_bytes));
+
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcInputStreamInitBuffer(input_stream.get(), buffer_copy.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 columns with 0 batches from

Review Comment:
   So this stresses reading the schema? Again, 0-length files or streams might exist, but they do not strike me as a valuable benchmark situation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532109360


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));

Review Comment:
   Please add option name
   ```suggestion
     NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, /*xxx=*/ true));
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot merged PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1537735183


##########
CMakeLists.txt:
##########
@@ -18,6 +18,11 @@
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 cmake_minimum_required(VERSION 3.14)
 
+# Try to use C11 (required by IPC extension) but fall back to C99

Review Comment:
   I removed this...it's mostly just confusing and apparently wasn't necessary. I did remember that the IPC extension prefers C11 because it (optionally) uses C11 atomics to implement sharing of buffers. We have a CI check to make sure that the IPC extension compiles with C99.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#issuecomment-2018215355

   I'll wait for https://github.com/apache/arrow-nanoarrow/pull/410 to merge for this one (to make sure CI is passing).


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532129144


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each

Review Comment:
   So, does it make sense to benchmark tiny batches? This is a not a real-world situation at all.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532133624


##########
dev/benchmarks/generate_fixtures.py:
##########
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   Note that, unless Nanoarrow IPC doesn't support writing, this might be done directly in the benchmarking setup.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1537735821


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));

Review Comment:
   Done!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532105726


##########
CMakeLists.txt:
##########
@@ -18,6 +18,11 @@
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 cmake_minimum_required(VERSION 3.14)
 
+# Try to use C11 (required by IPC extension) but fall back to C99

Review Comment:
   I'm curious, why does the IPC extension require C11?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532147268


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a file
+static void BenchmarkIpcReadManyBatchesFromFile(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  for (auto _ : state) {
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        MakeFixtureInputStreamFile("many_batches.arrows", input_stream.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a buffer
+static void BenchmarkIpcReadManyBatchesFromBuffer(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  nanoarrow::UniqueBuffer buffer;
+  NANOARROW_THROW_NOT_OK(MakeFixtureBuffer("many_batches.arrows", buffer.get()));
+
+  for (auto _ : state) {
+    // Note: an attempt to remove this copy does not affect the timing for this particular
+    // benchmark (it is possible to set a deallocator that does nothing and manually
+    // assign the data and size_bytes of the copy).
+    nanoarrow::UniqueBuffer buffer_copy;
+    NANOARROW_THROW_NOT_OK(
+        ArrowBufferAppend(buffer_copy.get(), buffer->data, buffer->size_bytes));
+
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcInputStreamInitBuffer(input_stream.get(), buffer_copy.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 columns with 0 batches from

Review Comment:
   This is almost certainly just me being new to benchmarking...I had been trying to capture end-member situations that put pressure on various pieces of the code, but it would probably be better to save those benchmarks for places where we can isolate the pieces better (e.g., test the `ArrowIpcDecoder` functions directly and not the stream implementation)?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532120498


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a file
+static void BenchmarkIpcReadManyBatchesFromFile(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  for (auto _ : state) {
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        MakeFixtureInputStreamFile("many_batches.arrows", input_stream.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);

Review Comment:
   Why `+ batch_count`? Also, are you sure `state.items_processed()` already carries a meaningful value?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "pitrou (via GitHub)" <gi...@apache.org>.
pitrou commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532148683


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a file
+static void BenchmarkIpcReadManyBatchesFromFile(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  for (auto _ : state) {
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        MakeFixtureInputStreamFile("many_batches.arrows", input_stream.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a buffer
+static void BenchmarkIpcReadManyBatchesFromBuffer(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  nanoarrow::UniqueBuffer buffer;
+  NANOARROW_THROW_NOT_OK(MakeFixtureBuffer("many_batches.arrows", buffer.get()));
+
+  for (auto _ : state) {
+    // Note: an attempt to remove this copy does not affect the timing for this particular
+    // benchmark (it is possible to set a deallocator that does nothing and manually
+    // assign the data and size_bytes of the copy).
+    nanoarrow::UniqueBuffer buffer_copy;
+    NANOARROW_THROW_NOT_OK(
+        ArrowBufferAppend(buffer_copy.get(), buffer->data, buffer->size_bytes));
+
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcInputStreamInitBuffer(input_stream.get(), buffer_copy.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);
+}
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 columns with 0 batches from

Review Comment:
   Yes, you could also micro-benchmark the IPC decoder. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532140128


##########
dev/benchmarks/generate_fixtures.py:
##########
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   We don't support writing (yet)!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1537737264


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each
+/// from a file
+static void BenchmarkIpcReadManyBatchesFromFile(benchmark::State& state) {
+  int64_t batch_count = 0;
+  int64_t column_count = 0;
+
+  for (auto _ : state) {
+    nanoarrow::ipc::UniqueInputStream input_stream;
+    NANOARROW_THROW_NOT_OK(
+        MakeFixtureInputStreamFile("many_batches.arrows", input_stream.get()));
+
+    nanoarrow::UniqueArrayStream array_stream;
+    NANOARROW_THROW_NOT_OK(
+        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(), nullptr));
+
+    NANOARROW_THROW_NOT_OK(
+        ArrayStreamReadAll(array_stream.get(), &batch_count, &column_count));
+
+    benchmark::DoNotOptimize(batch_count);
+  }
+
+  state.SetItemsProcessed(state.items_processed() + batch_count);

Review Comment:
   I removed this in favour of a bytes-per-second throughput metric (since the number of batches is no longer what we're testing).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "codecov-commenter (via GitHub)" <gi...@apache.org>.
codecov-commenter commented on PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#issuecomment-2008364658

   ## [Codecov](https://app.codecov.io/gh/apache/arrow-nanoarrow/pull/405?dropdown=coverage&src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) Report
   All modified and coverable lines are covered by tests :white_check_mark:
   > Project coverage is 88.74%. Comparing base [(`dc50114`)](https://app.codecov.io/gh/apache/arrow-nanoarrow/commit/dc50114756b7e9067b42181a6a86f928effc6e68?dropdown=coverage&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) to head [(`9a61672`)](https://app.codecov.io/gh/apache/arrow-nanoarrow/pull/405?dropdown=coverage&src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache).
   
   
   <details><summary>Additional details and impacted files</summary>
   
   
   ```diff
   @@           Coverage Diff           @@
   ##             main     #405   +/-   ##
   =======================================
     Coverage   88.74%   88.74%           
   =======================================
     Files          81       81           
     Lines       14398    14398           
   =======================================
     Hits        12778    12778           
     Misses       1620     1620           
   ```
   
   
   
   </details>
   
   [:umbrella: View full report in Codecov by Sentry](https://app.codecov.io/gh/apache/arrow-nanoarrow/pull/405?dropdown=coverage&src=pr&el=continue&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache).   
   :loudspeaker: Have feedback on the report? [Share it here](https://about.codecov.io/codecov-pr-comment-feedback/?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache).
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1532136281


##########
CMakeLists.txt:
##########
@@ -18,6 +18,11 @@
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 cmake_minimum_required(VERSION 3.14)
 
+# Try to use C11 (required by IPC extension) but fall back to C99

Review Comment:
   I put it in because the flatbuffer implementation ( https://github.com/dvidelabs/flatcc ) uses come C11 features, although now that I read the documentation, it seems like it just requires some extra includes to work with C99. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#issuecomment-2009653032

   > Question: does Nanoarrow IPC support compressed buffers?
   
   It does not currently support compressed buffers (nor writing)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Re: [PR] chore(dev/benchmarks): Benchmark IPC reader [arrow-nanoarrow]

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on code in PR #405:
URL: https://github.com/apache/arrow-nanoarrow/pull/405#discussion_r1537742305


##########
dev/benchmarks/c/ipc_benchmark.cc:
##########
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <stdio.h>
+
+#include <benchmark/benchmark.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_ipc.hpp>
+
+static ArrowErrorCode MakeFixtureInputStreamFile(const std::string& fixture_name,
+                                                 ArrowIpcInputStream* out) {
+  const char* fixture_dir = std::getenv("NANOARROW_BENCHMARK_FIXTURE_DIR");
+  if (fixture_dir == NULL) {
+    fixture_dir = "fixtures";
+  }
+
+  std::string fixture_path = std::string(fixture_dir) + std::string("/") + fixture_name;
+  FILE* fixture_file = fopen(fixture_path.c_str(), "rb");
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcInputStreamInitFile(out, fixture_file, true));
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode MakeFixtureBuffer(const std::string& fixture_name,
+                                        ArrowBuffer* out) {
+  nanoarrow::ipc::UniqueInputStream input_stream;
+  NANOARROW_RETURN_NOT_OK(MakeFixtureInputStreamFile(fixture_name, input_stream.get()));
+
+  nanoarrow::UniqueBuffer buffer;
+  int64_t size_read_out = 0;
+  int64_t chunk_size = 1024;
+  do {
+    NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer.get(), chunk_size));
+    NANOARROW_RETURN_NOT_OK(input_stream->read(input_stream.get(),
+                                               buffer->data + buffer->size_bytes,
+                                               chunk_size, &size_read_out, nullptr));
+    buffer->size_bytes += size_read_out;
+  } while (size_read_out > 0);
+
+  ArrowBufferMove(buffer.get(), out);
+  return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamReadAll(ArrowArrayStream* array_stream,
+                                         int64_t* batch_count, int64_t* column_count) {
+  nanoarrow::UniqueSchema schema;
+  NANOARROW_RETURN_NOT_OK(array_stream->get_schema(array_stream, schema.get()));
+  *column_count = schema->n_children;
+  benchmark::DoNotOptimize(schema);
+
+  nanoarrow::UniqueArrayView array_view;
+  NANOARROW_RETURN_NOT_OK(
+      ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), nullptr));
+
+  while (true) {
+    nanoarrow::UniqueArray array;
+    NANOARROW_RETURN_NOT_OK(array_stream->get_next(array_stream, array.get()));
+    if (array->release == nullptr) {
+      break;
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr));
+
+    *batch_count = *batch_count + 1;
+  }
+
+  return NANOARROW_OK;
+}
+
+/// \defgroup nanoarrow-benchmark-ipc IPC Reader Benchmarks
+///
+/// Benchmarks for the ArrowArrayStream IPC reader.
+///
+/// @{
+
+/// \brief Use the ArrowArrayStream IPC reader to read 10,000 batches with 5 elements each

Review Comment:
   This is fixed...the benchmarks are now on more normal ~10 MB files. I didn't spend much effort varying the types since the benchmark doesn't include a decode step (but made it clear in the benchmark name)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org