You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2022/04/14 10:43:40 UTC

[arrow] branch master updated: ARROW-11989: [C++][Python] Improve ChunkedArray's complexity for the access of elements

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4e49aa8eb5 ARROW-11989: [C++][Python] Improve ChunkedArray's complexity for the access of elements
4e49aa8eb5 is described below

commit 4e49aa8eb5f11b6160baf972108f8f57b42eeda8
Author: Eduardo Ponce <ed...@gmail.com>
AuthorDate: Thu Apr 14 12:43:31 2022 +0200

    ARROW-11989: [C++][Python] Improve ChunkedArray's complexity for the access of elements
    
    Improves search time for finding a chunk in ChunkedArray using a binary search, O(log n) for random access.
    Chunks are searched when invoking `GetScalar()` (C++) and index operator (Python).
    
    Closes #12055 from edponce/ARROW-11989-Improve-ChunkedArrays-complexity-for-the
    
    Lead-authored-by: Eduardo Ponce <ed...@gmail.com>
    Co-authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/arrow/CMakeLists.txt                     |   1 +
 cpp/src/arrow/array/array_base.cc                |   5 +-
 cpp/src/arrow/chunk_resolver.cc                  |  69 +++++++++++++++
 cpp/src/arrow/chunk_resolver.h                   | 104 +++++++++++++++++++++++
 cpp/src/arrow/chunked_array.cc                   |  21 ++---
 cpp/src/arrow/chunked_array.h                    |   4 +-
 cpp/src/arrow/chunked_array_test.cc              |   2 +-
 cpp/src/arrow/compute/kernels/chunked_internal.h |  99 +++------------------
 cpp/src/arrow/compute/kernels/vector_sort.cc     |  16 ++--
 python/pyarrow/array.pxi                         |   2 +-
 python/pyarrow/includes/libarrow.pxd             |   1 +
 python/pyarrow/io.pxi                            |   2 +-
 python/pyarrow/table.pxi                         |  21 ++---
 13 files changed, 222 insertions(+), 125 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index b6f1e2481f..2933457287 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -158,6 +158,7 @@ set(ARROW_SRCS
     builder.cc
     buffer.cc
     chunked_array.cc
+    chunk_resolver.cc
     compare.cc
     config.cc
     datum.cc
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index 11b6b1630c..b36fb0fb94 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -165,8 +165,9 @@ struct ScalarFromArraySlotImpl {
 
   Result<std::shared_ptr<Scalar>> Finish() && {
     if (index_ >= array_.length()) {
-      return Status::IndexError("tried to refer to element ", index_,
-                                " but array is only ", array_.length(), " long");
+      return Status::IndexError("index with value of ", index_,
+                                " is out-of-bounds for array of length ",
+                                array_.length());
     }
 
     if (array_.IsNull(index_)) {
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
new file mode 100644
index 0000000000..4a1ba6d0a3
--- /dev/null
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/chunk_resolver.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/record_batch.h"
+
+namespace arrow {
+namespace internal {
+
+namespace {
+template <typename T>
+int64_t GetLength(const T& array) {
+  // General case assumes argument is an Array pointer
+  return array->length();
+}
+
+template <>
+int64_t GetLength<std::shared_ptr<RecordBatch>>(
+    const std::shared_ptr<RecordBatch>& batch) {
+  return batch->num_rows();
+}
+
+template <typename T>
+inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
+  std::vector<int64_t> offsets(chunks.size() + 1);
+  int64_t offset = 0;
+  std::transform(chunks.begin(), chunks.end(), offsets.begin(),
+                 [&offset](const T& chunk) {
+                   auto curr_offset = offset;
+                   offset += GetLength(chunk);
+                   return curr_offset;
+                 });
+  offsets[chunks.size()] = offset;
+  return offsets;
+}
+}  // namespace
+
+ChunkResolver::ChunkResolver(const ArrayVector& chunks)
+    : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {}
+
+ChunkResolver::ChunkResolver(const std::vector<const Array*>& chunks)
+    : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {}
+
+ChunkResolver::ChunkResolver(const RecordBatchVector& batches)
+    : offsets_(MakeChunksOffsets(batches)), cached_chunk_(0) {}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
new file mode 100644
index 0000000000..f69253b161
--- /dev/null
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+struct ChunkLocation {
+  int64_t chunk_index, index_in_chunk;
+};
+
+// An object that resolves an array chunk depending on a logical index
+struct ChunkResolver {
+  explicit ChunkResolver(const ArrayVector& chunks);
+
+  explicit ChunkResolver(const std::vector<const Array*>& chunks);
+
+  explicit ChunkResolver(const RecordBatchVector& batches);
+
+  ChunkResolver(ChunkResolver&& other)
+      : offsets_(std::move(other.offsets_)), cached_chunk_(other.cached_chunk_.load()) {}
+
+  ChunkResolver& operator=(ChunkResolver&& other) {
+    offsets_ = std::move(other.offsets_);
+    cached_chunk_.store(other.cached_chunk_.load());
+    return *this;
+  }
+
+  /// \brief Return a ChunkLocation containing the chunk index and in-chunk value index of
+  /// the chunked array at logical index
+  inline ChunkLocation Resolve(const int64_t index) const {
+    // It is common for the algorithms below to make consecutive accesses at
+    // a relatively small distance from each other, hence often falling in
+    // the same chunk.
+    // This is trivial when merging (assuming each side of the merge uses
+    // its own resolver), but also in the inner recursive invocations of
+    // partitioning.
+    if (offsets_.size() <= 1) {
+      return {0, index};
+    }
+    const auto cached_chunk = cached_chunk_.load();
+    const bool cache_hit =
+        (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]);
+    if (ARROW_PREDICT_TRUE(cache_hit)) {
+      return {cached_chunk, index - offsets_[cached_chunk]};
+    }
+    auto chunk_index = Bisect(index);
+    cached_chunk_.store(chunk_index);
+    return {chunk_index, index - offsets_[chunk_index]};
+  }
+
+ protected:
+  // Find the chunk index corresponding to a value index using binary search
+  inline int64_t Bisect(const int64_t index) const {
+    // Like std::upper_bound(), but hand-written as it can help the compiler.
+    // Search [lo, lo + n)
+    int64_t lo = 0;
+    auto n = static_cast<int64_t>(offsets_.size());
+    while (n > 1) {
+      const int64_t m = n >> 1;
+      const int64_t mid = lo + m;
+      if (static_cast<int64_t>(index) >= offsets_[mid]) {
+        lo = mid;
+        n -= m;
+      } else {
+        n = m;
+      }
+    }
+    return lo;
+  }
+
+ private:
+  // Collection of starting offsets used for binary search
+  std::vector<int64_t> offsets_;
+
+  // Tracks the most recently used chunk index to allow fast
+  // access for consecutive indices corresponding to the same chunk
+  mutable std::atomic<int64_t> cached_chunk_;
+};
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index 3074690294..840dd04a5a 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -43,15 +43,17 @@ class MemoryPool;
 // ChunkedArray methods
 
 ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
-    : chunks_(std::move(chunks)), type_(std::move(type)) {
-  length_ = 0;
-  null_count_ = 0;
-
+    : chunks_(std::move(chunks)),
+      type_(std::move(type)),
+      length_(0),
+      null_count_(0),
+      chunk_resolver_{chunks_} {
   if (type_ == nullptr) {
     ARROW_CHECK_GT(chunks_.size(), 0)
         << "cannot construct ChunkedArray from empty vector and omitted type";
     type_ = chunks_[0]->type();
   }
+
   for (const auto& chunk : chunks_) {
     length_ += chunk->length();
     null_count_ += chunk->null_count();
@@ -147,13 +149,12 @@ bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
 }
 
 Result<std::shared_ptr<Scalar>> ChunkedArray::GetScalar(int64_t index) const {
-  for (const auto& chunk : chunks_) {
-    if (index < chunk->length()) {
-      return chunk->GetScalar(index);
-    }
-    index -= chunk->length();
+  const auto loc = chunk_resolver_.Resolve(index);
+  if (loc.chunk_index >= static_cast<int64_t>(chunks_.size())) {
+    return Status::IndexError("index with value of ", index,
+                              " is out-of-bounds for chunked array of length ", length_);
   }
-  return Status::Invalid("index out of bounds");
+  return chunks_[loc.chunk_index]->GetScalar(loc.index_in_chunk);
 }
 
 std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 36b2f38ed4..e130a99bbe 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
@@ -177,11 +178,12 @@ class ARROW_EXPORT ChunkedArray {
 
  protected:
   ArrayVector chunks_;
+  std::shared_ptr<DataType> type_;
   int64_t length_;
   int64_t null_count_;
-  std::shared_ptr<DataType> type_;
 
  private:
+  internal::ChunkResolver chunk_resolver_;
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
 };
 
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index a623cd4706..d1dc69de27 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -278,7 +278,7 @@ TEST_F(TestChunkedArray, GetScalar) {
   check_scalar(carr, 4, **MakeScalar(ty, 3));
   check_scalar(carr, 6, **MakeScalar(ty, 5));
 
-  ASSERT_RAISES(Invalid, carr.GetScalar(7));
+  ASSERT_RAISES(IndexError, carr.GetScalar(7));
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h
index b007d6cbfb..69f439fccf 100644
--- a/cpp/src/arrow/compute/kernels/chunked_internal.h
+++ b/cpp/src/arrow/compute/kernels/chunked_internal.h
@@ -19,12 +19,12 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <memory>
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/chunk_resolver.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/record_batch.h"
-#include "arrow/type.h"
 
 namespace arrow {
 namespace compute {
@@ -33,8 +33,8 @@ namespace internal {
 // The target chunk in a chunked array.
 template <typename ArrayType>
 struct ResolvedChunk {
-  using V = GetViewType<typename ArrayType::TypeClass>;
-  using LogicalValueType = typename V::T;
+  using ViewType = GetViewType<typename ArrayType::TypeClass>;
+  using LogicalValueType = typename ViewType::T;
 
   // The target array in chunked array.
   const ArrayType* array;
@@ -45,7 +45,7 @@ struct ResolvedChunk {
 
   bool IsNull() const { return array->IsNull(index); }
 
-  LogicalValueType Value() const { return V::LogicalValue(array->GetView(index)); }
+  LogicalValueType Value() const { return ViewType::LogicalValue(array->GetView(index)); }
 };
 
 // ResolvedChunk specialization for untyped arrays when all is needed is null lookup
@@ -61,97 +61,20 @@ struct ResolvedChunk<Array> {
   bool IsNull() const { return array->IsNull(index); }
 };
 
-struct ChunkLocation {
-  int64_t chunk_index, index_in_chunk;
-};
-
-// An object that resolves an array chunk depending on the index.
-struct ChunkResolver {
-  explicit ChunkResolver(std::vector<int64_t> lengths)
-      : num_chunks_(static_cast<int64_t>(lengths.size())),
-        offsets_(MakeEndOffsets(std::move(lengths))),
-        cached_chunk_(0) {}
-
-  ChunkLocation Resolve(int64_t index) const {
-    // It is common for the algorithms below to make consecutive accesses at
-    // a relatively small distance from each other, hence often falling in
-    // the same chunk.
-    // This is trivial when merging (assuming each side of the merge uses
-    // its own resolver), but also in the inner recursive invocations of
-    // partitioning.
-    const bool cache_hit =
-        (index >= offsets_[cached_chunk_] && index < offsets_[cached_chunk_ + 1]);
-    if (ARROW_PREDICT_TRUE(cache_hit)) {
-      return {cached_chunk_, index - offsets_[cached_chunk_]};
-    } else {
-      return ResolveMissBisect(index);
-    }
-  }
-
-  static ChunkResolver FromBatches(const RecordBatchVector& batches) {
-    std::vector<int64_t> lengths(batches.size());
-    std::transform(
-        batches.begin(), batches.end(), lengths.begin(),
-        [](const std::shared_ptr<RecordBatch>& batch) { return batch->num_rows(); });
-    return ChunkResolver(std::move(lengths));
-  }
-
- protected:
-  ChunkLocation ResolveMissBisect(int64_t index) const {
-    // Like std::upper_bound(), but hand-written as it can help the compiler.
-    const int64_t* raw_offsets = offsets_.data();
-    // Search [lo, lo + n)
-    int64_t lo = 0, n = num_chunks_;
-    while (n > 1) {
-      int64_t m = n >> 1;
-      int64_t mid = lo + m;
-      if (index >= raw_offsets[mid]) {
-        lo = mid;
-        n -= m;
-      } else {
-        n = m;
-      }
-    }
-    cached_chunk_ = lo;
-    return {lo, index - offsets_[lo]};
-  }
+struct ChunkedArrayResolver : protected ::arrow::internal::ChunkResolver {
+  ChunkedArrayResolver(const ChunkedArrayResolver& other)
+      : ::arrow::internal::ChunkResolver(other.chunks_), chunks_(other.chunks_) {}
 
-  static std::vector<int64_t> MakeEndOffsets(std::vector<int64_t> lengths) {
-    int64_t offset = 0;
-    for (auto& v : lengths) {
-      const auto this_length = v;
-      v = offset;
-      offset += this_length;
-    }
-    lengths.push_back(offset);
-    return lengths;
-  }
-
-  int64_t num_chunks_;
-  std::vector<int64_t> offsets_;
-
-  mutable int64_t cached_chunk_;
-};
-
-struct ChunkedArrayResolver : protected ChunkResolver {
   explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks)
-      : ChunkResolver(MakeLengths(chunks)), chunks_(chunks) {}
+      : ::arrow::internal::ChunkResolver(chunks), chunks_(chunks) {}
 
   template <typename ArrayType>
   ResolvedChunk<ArrayType> Resolve(int64_t index) const {
-    const auto loc = ChunkResolver::Resolve(index);
-    return ResolvedChunk<ArrayType>(
-        checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), loc.index_in_chunk);
+    const auto loc = ::arrow::internal::ChunkResolver::Resolve(index);
+    return {checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), loc.index_in_chunk};
   }
 
  protected:
-  static std::vector<int64_t> MakeLengths(const std::vector<const Array*>& chunks) {
-    std::vector<int64_t> lengths(chunks.size());
-    std::transform(chunks.begin(), chunks.end(), lengths.begin(),
-                   [](const Array* arr) { return arr->length(); });
-    return lengths;
-  }
-
   const std::vector<const Array*> chunks_;
 };
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 1f62ce5b42..ad3323199a 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -27,7 +27,9 @@
 
 #include "arrow/array/concatenate.h"
 #include "arrow/array/data.h"
+#include "arrow/chunk_resolver.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/compute/kernels/chunked_internal.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
@@ -849,10 +851,10 @@ class TableSorter {
           order(order),
           null_count(null_count) {}
 
-    using LocationType = ChunkLocation;
+    using LocationType = ::arrow::internal::ChunkLocation;
 
     template <typename ArrayType>
-    ResolvedChunk<ArrayType> GetChunk(ChunkLocation loc) const {
+    ResolvedChunk<ArrayType> GetChunk(::arrow::internal::ChunkLocation loc) const {
       return {checked_cast<const ArrayType*>(chunks[loc.chunk_index]),
               loc.index_in_chunk};
     }
@@ -895,8 +897,8 @@ class TableSorter {
         batches_(MakeBatches(table, &status_)),
         options_(options),
         null_placement_(options.null_placement),
-        left_resolver_(ChunkResolver::FromBatches(batches_)),
-        right_resolver_(ChunkResolver::FromBatches(batches_)),
+        left_resolver_(batches_),
+        right_resolver_(batches_),
         sort_keys_(ResolveSortKeys(table, batches_, options.sort_keys, &status_)),
         indices_begin_(indices_begin),
         indices_end_(indices_end),
@@ -1137,7 +1139,7 @@ class TableSorter {
   const RecordBatchVector batches_;
   const SortOptions& options_;
   const NullPlacement null_placement_;
-  const ChunkResolver left_resolver_, right_resolver_;
+  const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
   const std::vector<ResolvedSortKey> sort_keys_;
   uint64_t* indices_begin_;
   uint64_t* indices_end_;
@@ -1671,9 +1673,8 @@ class TableSelecter : public TypeVisitor {
         : order(order),
           type(GetPhysicalType(chunked_array->type())),
           chunks(GetPhysicalChunks(*chunked_array, type)),
-          chunk_pointers(GetArrayPointers(chunks)),
           null_count(chunked_array->null_count()),
-          resolver(chunk_pointers) {}
+          resolver(GetArrayPointers(chunks)) {}
 
     using LocationType = int64_t;
 
@@ -1687,7 +1688,6 @@ class TableSelecter : public TypeVisitor {
     const SortOrder order;
     const std::shared_ptr<DataType> type;
     const ArrayVector chunks;
-    const std::vector<const Array*> chunk_pointers;
     const int64_t null_count;
     const ChunkedArrayResolver resolver;
   };
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 05aeff1325..9c5069c2d2 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1278,7 +1278,7 @@ cdef class Array(_PandasConvertible):
         -------
         value : Scalar (index) or Array (slice)
         """
-        if PySlice_Check(key):
+        if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.length()))
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 55935feba6..c8e945dc14 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -754,6 +754,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         shared_ptr[CArray] chunk(int i)
         shared_ptr[CDataType] type()
+        CResult[shared_ptr[CScalar]] GetScalar(int64_t index) const
         shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const
         shared_ptr[CChunkedArray] Slice(int64_t offset) const
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index c6bf11b2dc..c39fc78d2a 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1050,7 +1050,7 @@ cdef class Buffer(_Weakrefable):
             return pyarrow_wrap_buffer(parent_buf)
 
     def __getitem__(self, key):
-        if PySlice_Check(key):
+        if isinstance(key, slice):
             if (key.step or 1) != 1:
                 raise IndexError('only slices with step 1 supported')
             return _normalize_slice(self, key)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index c4e8326652..9bd0553683 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -222,7 +222,7 @@ cdef class ChunkedArray(_PandasConvertible):
         If buffers are shared between arrays then the shared
         portion will only be counted multiple times.
 
-        The dictionary of dictionary arrays will always be counted in their 
+        The dictionary of dictionary arrays will always be counted in their
         entirety even if the array only references a portion of the dictionary.
 
         Examples
@@ -285,19 +285,14 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         value : Scalar (index) or ChunkedArray (slice)
         """
+
         if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.chunked_array.length()))
 
-    cdef getitem(self, int64_t index):
-        cdef int j
-
-        for j in range(self.num_chunks):
-            if index < self.chunked_array.chunk(j).get().length():
-                return self.chunk(j)[index]
-            else:
-                index -= self.chunked_array.chunk(j).get().length()
+    cdef getitem(self, int64_t i):
+        return Scalar.wrap(GetResultValue(self.chunked_array.GetScalar(i)))
 
     def is_null(self, *, nan_is_null=False):
         """
@@ -1991,8 +1986,8 @@ cdef class RecordBatch(_PandasConvertible):
         """
         if isinstance(key, slice):
             return _normalize_slice(self, key)
-        else:
-            return self.column(key)
+
+        return self.column(key)
 
     def serialize(self, memory_pool=None):
         """
@@ -2819,8 +2814,8 @@ cdef class Table(_PandasConvertible):
         """
         if isinstance(key, slice):
             return _normalize_slice(self, key)
-        else:
-            return self.column(key)
+
+        return self.column(key)
 
     def slice(self, offset=0, length=None):
         """