You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/19 20:41:48 UTC

[arrow] branch master updated: ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e4a6e4  ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
9e4a6e4 is described below

commit 9e4a6e4baa3dc18380a8173b07bf33f8764bf7ac
Author: Adam Seibert <se...@users.noreply.github.com>
AuthorDate: Fri Jan 19 15:41:43 2018 -0500

    ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
    
    Replicates `Slice` api from Array to ChunkedArray and Column.
    
    Author: Adam Seibert <se...@users.noreply.github.com>
    Author: Wes McKinney <we...@twosigma.com>
    
    Closes #1491 from seibs/ARROW-1930 and squashes the following commits:
    
    1f03793b [Wes McKinney] Tweak doxygen comments
    d920d80c [Adam Seibert] ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
---
 cpp/src/arrow/table-test.cc | 31 +++++++++++++++++++++++++++++++
 cpp/src/arrow/table.cc      | 24 ++++++++++++++++++++++++
 cpp/src/arrow/table.h       | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index 3f1c6be..99e4dd5 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -108,6 +108,21 @@ TEST_F(TestChunkedArray, EqualsDifferingLengths) {
   ASSERT_TRUE(one_->Equals(*another_.get()));
 }
 
+TEST_F(TestChunkedArray, SliceEquals) {
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+  Construct();
+
+  std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
+  ASSERT_EQ(slice->length(), 50);
+  ASSERT_TRUE(slice->Equals(one_->Slice(125, 50)));
+
+  std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
+  ASSERT_EQ(slice2->length(), 50);
+  ASSERT_TRUE(slice2->Equals(slice));
+}
+
 class TestColumn : public TestChunkedArray {
  protected:
   void Construct() override {
@@ -158,6 +173,22 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
   ASSERT_RAISES(Invalid, column_->ValidateData());
 }
 
+TEST_F(TestColumn, SliceEquals) {
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+  one_field_ = field("column", int32());
+  Construct();
+
+  std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
+  ASSERT_EQ(slice->length(), 50);
+  ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));
+
+  std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
+  ASSERT_EQ(slice2->length(), 50);
+  ASSERT_TRUE(slice2->Equals(slice));
+}
+
 TEST_F(TestColumn, Equals) {
   std::vector<bool> null_bitmap(100, true);
   std::vector<int32_t> data(100, 1);
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 2cf6c26..14877cc 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -102,6 +102,30 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
   return Equals(*other.get());
 }
 
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
+  DCHECK_LE(offset, length_);
+
+  int curr_chunk = 0;
+  while (offset >= chunk(curr_chunk)->length()) {
+    offset -= chunk(curr_chunk)->length();
+    curr_chunk++;
+  }
+
+  ArrayVector new_chunks;
+  while (length > 0 && curr_chunk < num_chunks()) {
+    new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
+    length -= chunk(curr_chunk)->length() - offset;
+    offset = 0;
+    curr_chunk++;
+  }
+
+  return std::make_shared<ChunkedArray>(new_chunks);
+}
+
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
+  return Slice(offset, length_);
+}
+
 Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
     : field_(field) {
   data_ = std::make_shared<ChunkedArray>(chunks);
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index c813b32..570a650 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -44,6 +44,7 @@ class ARROW_EXPORT ChunkedArray {
   /// \return the total length of the chunked array; computed on construction
   int64_t length() const { return length_; }
 
+  /// \return the total number of nulls among all chunks
   int64_t null_count() const { return null_count_; }
 
   int num_chunks() const { return static_cast<int>(chunks_.size()); }
@@ -53,6 +54,20 @@ class ARROW_EXPORT ChunkedArray {
 
   const ArrayVector& chunks() const { return chunks_; }
 
+  /// \brief Construct a zero-copy slice of the chunked array with the
+  /// indicated offset and length
+  ///
+  /// \param[in] offset the position of the first element in the constructed
+  /// slice
+  /// \param[in] length the length of the slice. If there are not enough
+  /// elements in the chunked array, the length will be adjusted accordingly
+  ///
+  /// \return a new object wrapped in std::shared_ptr<ChunkedArray>
+  std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
+
+  /// \brief Slice from offset until end of the chunked array
+  std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
+
   std::shared_ptr<DataType> type() const;
 
   bool Equals(const ChunkedArray& other) const;
@@ -67,8 +82,9 @@ class ARROW_EXPORT ChunkedArray {
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
 };
 
+/// \class Column
 /// \brief An immutable column data structure consisting of a field (type
-/// metadata) and a logical chunked data array
+/// metadata) and a chunked data array
 class ARROW_EXPORT Column {
  public:
   Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
@@ -97,6 +113,24 @@ class ARROW_EXPORT Column {
   /// \return the column's data as a chunked logical array
   std::shared_ptr<ChunkedArray> data() const { return data_; }
 
+  /// \brief Construct a zero-copy slice of the column with the indicated
+  /// offset and length
+  ///
+  /// \param[in] offset the position of the first element in the constructed
+  /// slice
+  /// \param[in] length the length of the slice. If there are not enough
+  /// elements in the column, the length will be adjusted accordingly
+  ///
+  /// \return a new object wrapped in std::shared_ptr<Column>
+  std::shared_ptr<Column> Slice(int64_t offset, int64_t length) const {
+    return std::make_shared<Column>(field_, data_->Slice(offset, length));
+  }
+
+  /// \brief Slice from offset until end of the column
+  std::shared_ptr<Column> Slice(int64_t offset) const {
+    return std::make_shared<Column>(field_, data_->Slice(offset));
+  }
+
   bool Equals(const Column& other) const;
   bool Equals(const std::shared_ptr<Column>& other) const;
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].