You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by as...@apache.org on 2015/07/24 22:10:54 UTC

orc git commit: ORC-22. Fixes #3 - Allow InputStreams to set the natural read size for their underlying file system. I've set the local file system to use 128k, but hdfs and webhdfs would both have substantially larger.

Repository: orc
Updated Branches:
  refs/heads/master 7f55b4537 -> 8971cca55


ORC-22. Fixes #3 - Allow InputStreams to set the natural read size for
their underlying file system. I've set the local file system to use
128k, but hdfs and webhdfs would both have substantially larger.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/8971cca5
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/8971cca5
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/8971cca5

Branch: refs/heads/master
Commit: 8971cca55bad96c23f7f31ca2b0f8070a726d1ba
Parents: 7f55b45
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jul 24 12:11:49 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Fri Jul 24 12:15:03 2015 -0700

----------------------------------------------------------------------
 c++/include/orc/OrcFile.hh |  6 ++++++
 c++/src/Compression.cc     | 16 +++++++---------
 c++/src/Compression.hh     |  6 +++---
 c++/src/OrcFile.cc         |  4 ++++
 c++/src/Reader.cc          | 10 +++-------
 tools/test/TestReader.cc   |  1 +
 6 files changed, 24 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/c++/include/orc/OrcFile.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/OrcFile.hh b/c++/include/orc/OrcFile.hh
index a537151..f8f13fb 100644
--- a/c++/include/orc/OrcFile.hh
+++ b/c++/include/orc/OrcFile.hh
@@ -43,6 +43,12 @@ namespace orc {
     virtual uint64_t getLength() const = 0;
 
     /**
+     * Get the natural size for reads.
+     * @return the number of bytes that should be read at once
+     */
+    virtual uint64_t getNaturalReadSize() const = 0;
+
+    /**
      * Read length bytes from the file starting at offset into
      * the buffer starting at buf.
      * @param buf the starting position of a buffer.

http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/c++/src/Compression.cc
----------------------------------------------------------------------
diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc
index 81b2c1c..a1b05dd 100644
--- a/c++/src/Compression.cc
+++ b/c++/src/Compression.cc
@@ -69,20 +69,20 @@ namespace orc {
   SeekableArrayInputStream::SeekableArrayInputStream
                (const unsigned char* values,
                 uint64_t size,
-                int64_t blkSize
+                uint64_t blkSize
                 ): data(reinterpret_cast<const char*>(values)) {
     length = size;
     position = 0;
-    blockSize = blkSize == -1 ? length : static_cast<uint64_t>(blkSize);
+    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
   }
 
   SeekableArrayInputStream::SeekableArrayInputStream(const char* values,
                                                      uint64_t size,
-                                                     int64_t blkSize
+                                                     uint64_t blkSize
                                                      ): data(values) {
     length = size;
     position = 0;
-    blockSize = blkSize == -1 ? length : static_cast<uint64_t>(blkSize);
+    blockSize = blkSize == 0 ? length : static_cast<uint64_t>(blkSize);
   }
 
   bool SeekableArrayInputStream::Next(const void** buffer, int*size) {
@@ -135,17 +135,15 @@ namespace orc {
     return result.str();
   }
 
-  static uint64_t computeBlock(int64_t request, uint64_t length) {
-    return std::min(length,
-                    static_cast<uint64_t>(request < 0 ?
-                                          256 * 1024 : request));
+  static uint64_t computeBlock(uint64_t request, uint64_t length) {
+    return std::min(length, request == 0 ? 256 * 1024 : request);
   }
 
   SeekableFileInputStream::SeekableFileInputStream(InputStream* stream,
                                                    uint64_t offset,
                                                    uint64_t byteCount,
                                                    MemoryPool& _pool,
-                                                   int64_t _blockSize
+                                                   uint64_t _blockSize
                                                    ):pool(_pool),
                                                      input(stream),
                                                      start(offset),

http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/c++/src/Compression.hh
----------------------------------------------------------------------
diff --git a/c++/src/Compression.hh b/c++/src/Compression.hh
index 2c02584..efd374a 100644
--- a/c++/src/Compression.hh
+++ b/c++/src/Compression.hh
@@ -70,10 +70,10 @@ namespace orc {
   public:
     SeekableArrayInputStream(const unsigned char* list,
                              uint64_t length,
-                             int64_t block_size = -1);
+                             uint64_t block_size = 0);
     SeekableArrayInputStream(const char* list,
                              uint64_t length,
-                             int64_t block_size = -1);
+                             uint64_t block_size = 0);
     virtual ~SeekableArrayInputStream();
     virtual bool Next(const void** data, int*size) override;
     virtual void BackUp(int count) override;
@@ -102,7 +102,7 @@ namespace orc {
                             uint64_t offset,
                             uint64_t byteCount,
                             MemoryPool& pool,
-                            int64_t blockSize = -1);
+                            uint64_t blockSize = 0);
     virtual ~SeekableFileInputStream();
 
     virtual bool Next(const void** data, int*size) override;

http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/c++/src/OrcFile.cc
----------------------------------------------------------------------
diff --git a/c++/src/OrcFile.cc b/c++/src/OrcFile.cc
index f8c22c4..d5d00fd 100644
--- a/c++/src/OrcFile.cc
+++ b/c++/src/OrcFile.cc
@@ -57,6 +57,10 @@ namespace orc {
       return totalLength;
     }
 
+    uint64_t getNaturalReadSize() const override {
+      return 128 * 1024;
+    }
+
     void read(void* buf,
               uint64_t length,
               uint64_t offset) override {

http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 85f629f..684b9e4 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1316,10 +1316,7 @@ namespace orc {
                          (new SeekableFileInputStream(stream.get(),
                                                       stripeFooterStart,
                                                       stripeFooterLength,
-                                                      memoryPool,
-                                                      static_cast<int64_t>
-                                                      (blockSize)
-                                                      )),
+                                                      memoryPool)),
                          blockSize,
                          memoryPool);
     proto::StripeFooter result;
@@ -1400,9 +1397,8 @@ namespace orc {
       if (stream.has_kind() &&
           stream.kind() == kind &&
           stream.column() == static_cast<uint64_t>(columnId)) {
-        int64_t myBlock = static_cast<int64_t>(shouldStream ?
-                                         1024 * 1024 :
-                                         stream.length());
+        uint64_t myBlock = shouldStream ? input.getNaturalReadSize():
+          stream.length();
         return createDecompressor(reader.getCompression(),
                                   std::unique_ptr<SeekableInputStream>
                                   (new SeekableFileInputStream

http://git-wip-us.apache.org/repos/asf/orc/blob/8971cca5/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index 72aa5fd..4d53a62 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -2914,6 +2914,7 @@ public:
   MOCK_CONST_METHOD0(getLength, uint64_t());
   MOCK_CONST_METHOD0(getName, const std::string&());
   MOCK_METHOD3(read, void (void*, uint64_t, uint64_t));
+  MOCK_CONST_METHOD0(getNaturalReadSize, uint64_t());
 };
 
 MockInputStream::~MockInputStream() {