You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/04/28 15:31:45 UTC

[orc] branch main updated: ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new b11ab0abe ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment
b11ab0abe is described below

commit b11ab0abeed02ebf09856c3b6223dd690c68fff3
Author: kaka11chen <ka...@gmail.com>
AuthorDate: Fri Apr 28 08:31:39 2023 -0700

    ORC-1408: [C++] Add `testVectorBatchHasNull` test case and comment
    
    ### What changes were proposed in this pull request?
    
    Add a new test case `testVectorBatchHasNull` and more comments.
    
    ### Why are the changes needed?
    
    To improve a test coverage.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    Closes #1469 from kaka11chen/ORC-1408.
    
    Authored-by: kaka11chen <ka...@gmail.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 c++/include/orc/Vector.hh    |  5 +++
 c++/test/TestColumnReader.cc | 88 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)

diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 5e65314fb..f6419e93e 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -37,6 +37,11 @@ namespace orc {
    * The base class for each of the column vectors. This class handles
    * the generic attributes such as number of elements, capacity, and
    * notNull vector.
+   * Note: If hasNull is true, the values in the notNull buffer are not required.
+   * On the writer side, it does not read values from notNull buffer so users are
+   * not expected to write notNull buffer if hasNull is true. On the reader side,
+   * it does not set notNull buffer if hasNull is true, meaning that it is undefined
+   * behavior to consume values from notNull buffer in this case by downstream users.
    */
   struct ColumnVectorBatch {
     ColumnVectorBatch(uint64_t capacity, MemoryPool& pool);
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index ec02cabe9..98f2d86bd 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -4158,6 +4158,94 @@ namespace orc {
     }
   }
 
+  TEST(TestColumnReader, testVectorBatchHasNull) {
+    // reuse same StructVectorBatch
+    LongVectorBatch* longBatch = new LongVectorBatch(1024, *getDefaultPool());
+    StructVectorBatch batch(1024, *getDefaultPool());
+    batch.fields.push_back(longBatch);
+
+    // create the row type
+    std::unique_ptr<Type> rowType = createStructType();
+    rowType->addStructField("myInt", createPrimitiveType(INT));
+
+    // read integer with nulls
+    {
+      MockStripeStreams streams;
+
+      // set getSelectedColumns()
+      std::vector<bool> selectedColumns(2, true);
+
+      EXPECT_CALL(streams, getSelectedColumns()).WillRepeatedly(testing::Return(selectedColumns));
+
+      // set getEncoding
+      proto::ColumnEncoding directEncoding;
+      directEncoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+      EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
+
+      // set getStream
+      EXPECT_CALL(streams, getStreamProxy(0, proto::Stream_Kind_PRESENT, true))
+          .WillRepeatedly(testing::Return(nullptr));
+      const unsigned char buffer1[] = {0x16, 0xf0};
+      EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_PRESENT, true))
+          .WillRepeatedly(
+              testing::Return(new SeekableArrayInputStream(buffer1, ARRAY_SIZE(buffer1))));
+      const unsigned char buffer2[] = {0x32, 0x01, 0x00};
+      EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
+          .WillRepeatedly(
+              testing::Return(new SeekableArrayInputStream(buffer2, ARRAY_SIZE(buffer2))));
+
+      std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
+
+      reader->next(batch, 100, 0);
+      ASSERT_EQ(100, batch.numElements);
+      ASSERT_EQ(true, !batch.hasNulls);
+      ASSERT_EQ(100, longBatch->numElements);
+      ASSERT_EQ(true, longBatch->hasNulls);
+      long next = 0;
+      for (size_t i = 0; i < batch.numElements; ++i) {
+        if (i & 4) {
+          EXPECT_EQ(0, longBatch->notNull[i]);
+        } else {
+          EXPECT_EQ(1, longBatch->notNull[i]);
+          EXPECT_EQ(next++, longBatch->data[i]);
+        }
+      }
+    }
+
+    // read no-null integers without PRESENT stream.
+    {
+      MockStripeStreams streams;
+
+      // set getSelectedColumns()
+      std::vector<bool> selectedColumns(2, true);
+
+      EXPECT_CALL(streams, getSelectedColumns()).WillRepeatedly(testing::Return(selectedColumns));
+
+      // set getEncoding
+      proto::ColumnEncoding directEncoding;
+      directEncoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
+      EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding));
+
+      // set getStream
+      EXPECT_CALL(streams, getStreamProxy(0, proto::Stream_Kind_PRESENT, true))
+          .WillRepeatedly(testing::Return(nullptr));
+      EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_PRESENT, true))
+          .WillRepeatedly(testing::Return(nullptr));
+      const unsigned char buffer[] = {0x64, 0x01, 0x00};
+      EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
+          .WillRepeatedly(
+              testing::Return(new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer))));
+
+      std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
+
+      reader->next(batch, 100, 0);
+      ASSERT_EQ(100, batch.numElements);
+      ASSERT_EQ(true, !batch.hasNulls);
+      ASSERT_EQ(100, longBatch->numElements);
+      ASSERT_EQ(true, !longBatch->hasNulls);
+    }
+  }
+
   INSTANTIATE_TEST_SUITE_P(OrcColumnReaderTest, TestColumnReaderEncoded, Values(true, false));
 
 }  // namespace orc