You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/04/28 15:31:45 UTC
[orc] branch main updated: ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new b11ab0abe ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment
b11ab0abe is described below
commit b11ab0abeed02ebf09856c3b6223dd690c68fff3
Author: kaka11chen <ka...@gmail.com>
AuthorDate: Fri Apr 28 08:31:39 2023 -0700
ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment
### What changes were proposed in this pull request?
Add a new test case `testVectorBatchHasNull` and more comments.
### Why are the changes needed?
To improve a test coverage.
### How was this patch tested?
Pass the CIs.
Closes #1469 from kaka11chen/ORC-1408.
Authored-by: kaka11chen <ka...@gmail.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
c++/include/orc/Vector.hh | 5 +++
c++/test/TestColumnReader.cc | 88 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 93 insertions(+)
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 5e65314fb..f6419e93e 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -37,6 +37,11 @@ namespace orc {
* The base class for each of the column vectors. This class handles
* the generic attributes such as number of elements, capacity, and
* notNull vector.
+ * Note: If hasNull is true, the values in the notNull buffer are not required.
+ * On the writer side, it does not read values from notNull buffer so users are
+ * not expected to write notNull buffer if hasNull is true. On the reader side,
+ * it does not set notNull buffer if hasNull is true, meaning that it is undefined
+ * behavior to consume values from notNull buffer in this case by downstream users.
*/
struct ColumnVectorBatch {
ColumnVectorBatch(uint64_t capacity, MemoryPool& pool);
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index ec02cabe9..98f2d86bd 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -4158,6 +4158,94 @@ namespace orc {
}
}
+ TEST(TestColumnReader, testVectorBatchHasNull) {
+ // reuse same StructVectorBatch
+ LongVectorBatch* longBatch = new LongVectorBatch(1024, *getDefaultPool());
+ StructVectorBatch batch(1024, *getDefaultPool());
+ batch.fields.push_back(longBatch);
+
+ // create the row type
+ std::unique_ptr<Type> rowType = createStructType();
+ rowType->addStructField("myInt", createPrimitiveType(INT));
+
+ // read integer with nulls
+ {
+ MockStripeStreams streams;
+
+ // set getSelectedColumns()
+ std::vector<bool> selectedColumns(2, true);
+
+ EXPECT_CALL(streams, getSelectedColumns()).WillRepeatedly(testing::Return(selectedColumns));
+
+ // set getEncoding
+ proto::ColumnEncoding directEncoding;
+ directEncoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+ EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
+
+ // set getStream
+ EXPECT_CALL(streams, getStreamProxy(0, proto::Stream_Kind_PRESENT, true))
+ .WillRepeatedly(testing::Return(nullptr));
+ const unsigned char buffer1[] = {0x16, 0xf0};
+ EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_PRESENT, true))
+ .WillRepeatedly(
+ testing::Return(new SeekableArrayInputStream(buffer1, ARRAY_SIZE(buffer1))));
+ const unsigned char buffer2[] = {0x32, 0x01, 0x00};
+ EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
+ .WillRepeatedly(
+ testing::Return(new SeekableArrayInputStream(buffer2, ARRAY_SIZE(buffer2))));
+
+ std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
+
+ reader->next(batch, 100, 0);
+ ASSERT_EQ(100, batch.numElements);
+ ASSERT_EQ(true, !batch.hasNulls);
+ ASSERT_EQ(100, longBatch->numElements);
+ ASSERT_EQ(true, longBatch->hasNulls);
+ long next = 0;
+ for (size_t i = 0; i < batch.numElements; ++i) {
+ if (i & 4) {
+ EXPECT_EQ(0, longBatch->notNull[i]);
+ } else {
+ EXPECT_EQ(1, longBatch->notNull[i]);
+ EXPECT_EQ(next++, longBatch->data[i]);
+ }
+ }
+ }
+
+ // read no-null integers without PRESENT stream.
+ {
+ MockStripeStreams streams;
+
+ // set getSelectedColumns()
+ std::vector<bool> selectedColumns(2, true);
+
+ EXPECT_CALL(streams, getSelectedColumns()).WillRepeatedly(testing::Return(selectedColumns));
+
+ // set getEncoding
+ proto::ColumnEncoding directEncoding;
+ directEncoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+ EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
+
+ // set getStream
+ EXPECT_CALL(streams, getStreamProxy(0, proto::Stream_Kind_PRESENT, true))
+ .WillRepeatedly(testing::Return(nullptr));
+ EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_PRESENT, true))
+ .WillRepeatedly(testing::Return(nullptr));
+ const unsigned char buffer[] = {0x64, 0x01, 0x00};
+ EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
+ .WillRepeatedly(
+ testing::Return(new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer))));
+
+ std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
+
+ reader->next(batch, 100, 0);
+ ASSERT_EQ(100, batch.numElements);
+ ASSERT_EQ(true, !batch.hasNulls);
+ ASSERT_EQ(100, longBatch->numElements);
+ ASSERT_EQ(true, !longBatch->hasNulls);
+ }
+ }
+
INSTANTIATE_TEST_SUITE_P(OrcColumnReaderTest, TestColumnReaderEncoded, Values(true, false));
} // namespace orc