You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/19 20:41:48 UTC
[arrow] branch master updated: ARROW-1930: [C++] Adds Slice
operation to ChunkedArray and Column
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9e4a6e4 ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
9e4a6e4 is described below
commit 9e4a6e4baa3dc18380a8173b07bf33f8764bf7ac
Author: Adam Seibert <se...@users.noreply.github.com>
AuthorDate: Fri Jan 19 15:41:43 2018 -0500
ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
Replicates `Slice` api from Array to ChunkedArray and Column.
Author: Adam Seibert <se...@users.noreply.github.com>
Author: Wes McKinney <we...@twosigma.com>
Closes #1491 from seibs/ARROW-1930 and squashes the following commits:
1f03793b [Wes McKinney] Tweak doxygen comments
d920d80c [Adam Seibert] ARROW-1930: [C++] Adds Slice operation to ChunkedArray and Column
---
cpp/src/arrow/table-test.cc | 31 +++++++++++++++++++++++++++++++
cpp/src/arrow/table.cc | 24 ++++++++++++++++++++++++
cpp/src/arrow/table.h | 36 +++++++++++++++++++++++++++++++++++-
3 files changed, 90 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index 3f1c6be..99e4dd5 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -108,6 +108,21 @@ TEST_F(TestChunkedArray, EqualsDifferingLengths) {
ASSERT_TRUE(one_->Equals(*another_.get()));
}
+TEST_F(TestChunkedArray, SliceEquals) {
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+ Construct();
+
+ std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
+ ASSERT_EQ(slice->length(), 50);
+ ASSERT_TRUE(slice->Equals(one_->Slice(125, 50)));
+
+ std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
+ ASSERT_EQ(slice2->length(), 50);
+ ASSERT_TRUE(slice2->Equals(slice));
+}
+
class TestColumn : public TestChunkedArray {
protected:
void Construct() override {
@@ -158,6 +173,22 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
ASSERT_RAISES(Invalid, column_->ValidateData());
}
+TEST_F(TestColumn, SliceEquals) {
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+ arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
+ one_field_ = field("column", int32());
+ Construct();
+
+ std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
+ ASSERT_EQ(slice->length(), 50);
+ ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));
+
+ std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
+ ASSERT_EQ(slice2->length(), 50);
+ ASSERT_TRUE(slice2->Equals(slice));
+}
+
TEST_F(TestColumn, Equals) {
std::vector<bool> null_bitmap(100, true);
std::vector<int32_t> data(100, 1);
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 2cf6c26..14877cc 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -102,6 +102,30 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
return Equals(*other.get());
}
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
+ DCHECK_LE(offset, length_);
+
+ int curr_chunk = 0;
+ while (offset >= chunk(curr_chunk)->length()) {
+ offset -= chunk(curr_chunk)->length();
+ curr_chunk++;
+ }
+
+ ArrayVector new_chunks;
+ while (length > 0 && curr_chunk < num_chunks()) {
+ new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
+ length -= chunk(curr_chunk)->length() - offset;
+ offset = 0;
+ curr_chunk++;
+ }
+
+ return std::make_shared<ChunkedArray>(new_chunks);
+}
+
+std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
+ return Slice(offset, length_);
+}
+
Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
: field_(field) {
data_ = std::make_shared<ChunkedArray>(chunks);
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index c813b32..570a650 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -44,6 +44,7 @@ class ARROW_EXPORT ChunkedArray {
/// \return the total length of the chunked array; computed on construction
int64_t length() const { return length_; }
+ /// \return the total number of nulls among all chunks
int64_t null_count() const { return null_count_; }
int num_chunks() const { return static_cast<int>(chunks_.size()); }
@@ -53,6 +54,20 @@ class ARROW_EXPORT ChunkedArray {
const ArrayVector& chunks() const { return chunks_; }
+ /// \brief Construct a zero-copy slice of the chunked array with the
+ /// indicated offset and length
+ ///
+ /// \param[in] offset the position of the first element in the constructed
+ /// slice
+ /// \param[in] length the length of the slice. If there are not enough
+ /// elements in the chunked array, the length will be adjusted accordingly
+ ///
+ /// \return a new object wrapped in std::shared_ptr<ChunkedArray>
+ std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
+
+ /// \brief Slice from offset until end of the chunked array
+ std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
+
std::shared_ptr<DataType> type() const;
bool Equals(const ChunkedArray& other) const;
@@ -67,8 +82,9 @@ class ARROW_EXPORT ChunkedArray {
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};
+/// \class Column
/// \brief An immutable column data structure consisting of a field (type
-/// metadata) and a logical chunked data array
+/// metadata) and a chunked data array
class ARROW_EXPORT Column {
public:
Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
@@ -97,6 +113,24 @@ class ARROW_EXPORT Column {
/// \return the column's data as a chunked logical array
std::shared_ptr<ChunkedArray> data() const { return data_; }
+ /// \brief Construct a zero-copy slice of the column with the indicated
+ /// offset and length
+ ///
+ /// \param[in] offset the position of the first element in the constructed
+ /// slice
+ /// \param[in] length the length of the slice. If there are not enough
+ /// elements in the column, the length will be adjusted accordingly
+ ///
+ /// \return a new object wrapped in std::shared_ptr<Column>
+ std::shared_ptr<Column> Slice(int64_t offset, int64_t length) const {
+ return std::make_shared<Column>(field_, data_->Slice(offset, length));
+ }
+
+ /// \brief Slice from offset until end of the column
+ std::shared_ptr<Column> Slice(int64_t offset) const {
+ return std::make_shared<Column>(field_, data_->Slice(offset));
+ }
+
bool Equals(const Column& other) const;
bool Equals(const std::shared_ptr<Column>& other) const;
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].