You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/08/26 18:57:32 UTC

orc git commit: ORC-25. Clean up Coverity warnings.

Repository: orc
Updated Branches:
  refs/heads/master 891c538a9 -> bec0d82fd


ORC-25. Clean up Coverity warnings.

closes apache/orc#7


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/bec0d82f
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/bec0d82f
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/bec0d82f

Branch: refs/heads/master
Commit: bec0d82fdbec0a5d6dfd312d34121049763d8ca7
Parents: 891c538
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Aug 25 14:45:39 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed Aug 26 09:55:31 2015 -0700

----------------------------------------------------------------------
 c++/src/ColumnPrinter.cc    | 44 +++++++++++++++++-------
 c++/src/MemoryPool.cc       |  2 +-
 c++/src/RLEv1.cc            |  5 ++-
 c++/src/RLEv2.cc            |  6 ++--
 c++/src/Reader.cc           | 62 ++++++++++++++++++++++++----------
 c++/src/Vector.cc           |  4 +++
 c++/test/TestCompression.cc |  8 ++++-
 c++/test/TestInt128.cc      |  2 +-
 tools/src/FileContents.cc   | 33 +++++++++---------
 tools/src/FileMetadata.cc   | 72 ++++++++++++++++++++-------------------
 tools/src/FileScan.cc       |  7 ++--
 tools/src/FileStatistics.cc | 43 +++++++++++------------
 tools/test/TestReader.cc    | 73 ++++++++++++++++++++++------------------
 13 files changed, 215 insertions(+), 146 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 1367b02..aa90be6 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -279,8 +279,9 @@ namespace orc {
 
   LongColumnPrinter::LongColumnPrinter(std::string& buffer,
                                        const Type& type
-                                       ): ColumnPrinter(buffer, type) {
-    // pass
+                                       ): ColumnPrinter(buffer, type),
+                                          data(nullptr) {
+    // PASS
   }
 
   void LongColumnPrinter::reset(const  ColumnVectorBatch& batch) {
@@ -302,6 +303,7 @@ namespace orc {
   DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
                                            const Type& type
                                            ): ColumnPrinter(buffer, type),
+                                              data(nullptr),
                                               isFloat(type.getKind() == FLOAT){
     // PASS
   }
@@ -325,7 +327,9 @@ namespace orc {
   Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
                                                  const  Type& type
                                                  ): ColumnPrinter(buffer,
-                                                                  type) {
+                                                                  type),
+                                                    data(nullptr),
+                                                    scale(0) {
     // PASS
   }
 
@@ -375,14 +379,16 @@ namespace orc {
   Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
                                                    const Type& type
                                                    ): ColumnPrinter(buffer,
-                                                                    type) {
+                                                                    type),
+                                                      data(nullptr),
+                                                      scale(0) {
      // PASS
    }
 
    void Decimal128ColumnPrinter::reset(const  ColumnVectorBatch& batch) {
      ColumnPrinter::reset(batch);
      data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
-     scale =dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+     scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
    }
 
    void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
@@ -395,7 +401,9 @@ namespace orc {
 
   StringColumnPrinter::StringColumnPrinter(std::string& buffer,
                                            const Type& type
-                                           ): ColumnPrinter(buffer, type) {
+                                           ): ColumnPrinter(buffer, type),
+                                              start(nullptr),
+                                              length(nullptr) {
     // PASS
   }
 
@@ -445,7 +453,8 @@ namespace orc {
 
   ListColumnPrinter::ListColumnPrinter(std::string& buffer,
                                        const Type& type
-                                       ): ColumnPrinter(buffer, type) {
+                                       ): ColumnPrinter(buffer, type),
+                                          offsets(nullptr) {
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
   }
 
@@ -473,7 +482,8 @@ namespace orc {
 
   MapColumnPrinter::MapColumnPrinter(std::string& buffer,
                                      const Type& type
-                                     ): ColumnPrinter(buffer, type) {
+                                     ): ColumnPrinter(buffer, type),
+                                        offsets(nullptr) {
     keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
   }
@@ -507,7 +517,9 @@ namespace orc {
 
   UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
                                            const Type& type
-                                           ): ColumnPrinter(buffer, type) {
+                                         ): ColumnPrinter(buffer, type),
+                                            tags(nullptr),
+                                            offsets(nullptr) {
     for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
       fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
                              .release());
@@ -590,7 +602,8 @@ namespace orc {
 
   DateColumnPrinter::DateColumnPrinter(std::string& buffer,
                                        const Type& type
-                                       ): ColumnPrinter(buffer, type) {
+                                       ): ColumnPrinter(buffer, type),
+                                          data(nullptr) {
     // PASS
   }
 
@@ -616,7 +629,8 @@ namespace orc {
 
   BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
                                              const Type& type
-                                             ): ColumnPrinter(buffer, type) {
+                                             ): ColumnPrinter(buffer, type),
+                                                data(nullptr) {
     // PASS
   }
 
@@ -635,7 +649,9 @@ namespace orc {
 
   BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
                                            const Type& type
-                                           ): ColumnPrinter(buffer, type) {
+                                           ): ColumnPrinter(buffer, type),
+                                              start(nullptr),
+                                              length(nullptr) {
     // PASS
   }
 
@@ -666,7 +682,9 @@ namespace orc {
   TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
                                                  const Type& type
                                                  ): ColumnPrinter(buffer,
-                                                                  type) {
+                                                                  type),
+                                                    seconds(nullptr),
+                                                    nanoseconds(nullptr) {
     // PASS
   }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/MemoryPool.cc
----------------------------------------------------------------------
diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc
index 28cc9e1..7fecf67 100644
--- a/c++/src/MemoryPool.cc
+++ b/c++/src/MemoryPool.cc
@@ -88,7 +88,7 @@ namespace orc {
 
   template <class T>
   void DataBuffer<T>::reserve(uint64_t newCapacity){
-    if (newCapacity > currentCapacity) {
+    if (newCapacity > currentCapacity || !buf) {
       if (buf) {
         T* buf_old = buf;
         buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/RLEv1.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
index 91bb79d..0a9a65e 100644
--- a/c++/src/RLEv1.cc
+++ b/c++/src/RLEv1.cc
@@ -86,8 +86,11 @@ RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
     : inputStream(std::move(input)),
       isSigned(hasSigned),
       remainingValues(0),
+      value(0),
       bufferStart(nullptr),
-      bufferEnd(bufferStart) {
+      bufferEnd(bufferStart),
+      delta(0),
+      repeating(false) {
 }
 
 void RleDecoderV1::seek(PositionProvider& location) {

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/RLEv2.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv2.cc b/c++/src/RLEv2.cc
index 43428b4..02f325f 100644
--- a/c++/src/RLEv2.cc
+++ b/c++/src/RLEv2.cc
@@ -34,8 +34,7 @@ struct FixedBitSizes {
 };
 
 inline uint32_t decodeBitWidth(uint32_t n) {
-  if (n >= FixedBitSizes::ONE &&
-      n <= FixedBitSizes::TWENTYFOUR) {
+  if (n <= FixedBitSizes::TWENTYFOUR) {
     return n + 1;
   } else if (n == FixedBitSizes::TWENTYSIX) {
     return 26;
@@ -125,8 +124,11 @@ RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
                               bitsLeft(0),
                               curByte(0),
                               patchBitSize(0),
+                              unpackedIdx(0),
+                              patchIdx(0),
                               base(0),
                               curGap(0),
+                              curPatch(0),
                               patchMask(0),
                               actualGap(0),
                               unpacked(pool, 0),

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index b7cf00d..cb0647d 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1525,12 +1525,16 @@ namespace orc {
       result = new TimestampVectorBatch(capacity, memoryPool);
       break;
     case STRUCT:
-      result = new StructVectorBatch(capacity, memoryPool);
-      for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
-        subtype = &(type.getSubtype(i));
-        if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-          dynamic_cast<StructVectorBatch*>(result)->fields.push_back
-            (createRowBatch(*subtype, capacity).release());
+      {
+        StructVectorBatch *structResult =
+          new StructVectorBatch(capacity, memoryPool);
+        result = structResult;
+        for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
+          subtype = &(type.getSubtype(i));
+          if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
+            structResult->fields.push_back(createRowBatch(*subtype,
+                                                          capacity).release());
+          }
         }
       }
       break;
@@ -1563,12 +1567,16 @@ namespace orc {
       }
       break;
     case UNION:
-      result = new UnionVectorBatch(capacity, memoryPool);
-      for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
-        subtype = &(type.getSubtype(i));
-        if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-          dynamic_cast<UnionVectorBatch*>(result)->children.push_back
-            (createRowBatch(*subtype, capacity).release());
+      {
+        UnionVectorBatch *unionResult =
+          new UnionVectorBatch(capacity, memoryPool);
+        result = unionResult;
+        for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
+          subtype = &(type.getSubtype(i));
+          if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
+            unionResult->children.push_back(createRowBatch(*subtype,
+                                                          capacity).release());
+          }
         }
       }
       break;
@@ -1804,6 +1812,8 @@ namespace orc {
     valueCount = pb.numberofvalues();
     if (!pb.has_binarystatistics() || !correctStats) {
       _hasTotalLength = false;
+
+      totalLength = 0;
     }else{
       _hasTotalLength = pb.binarystatistics().has_sum();
       totalLength = static_cast<uint64_t>(pb.binarystatistics().sum());
@@ -1815,6 +1825,7 @@ namespace orc {
     valueCount = pb.numberofvalues();
     if (!pb.has_bucketstatistics() || !correctStats) {
       _hasCount = false;
+      trueCount = 0;
     }else{
       _hasCount = true;
       trueCount = pb.bucketstatistics().count(0);
@@ -1827,11 +1838,14 @@ namespace orc {
     if (!pb.has_datestatistics() || !correctStats) {
       _hasMinimum = false;
       _hasMaximum = false;
-    }else{
-        _hasMinimum = pb.datestatistics().has_minimum();
-        _hasMaximum = pb.datestatistics().has_maximum();
-        minimum = pb.datestatistics().minimum();
-        maximum = pb.datestatistics().maximum();
+
+      minimum = 0;
+      maximum = 0;
+    } else {
+      _hasMinimum = pb.datestatistics().has_minimum();
+      _hasMaximum = pb.datestatistics().has_maximum();
+      minimum = pb.datestatistics().minimum();
+      maximum = pb.datestatistics().maximum();
     }
   }
 
@@ -1861,6 +1875,10 @@ namespace orc {
       _hasMinimum = false;
       _hasMaximum = false;
       _hasSum = false;
+
+      minimum = 0;
+      maximum = 0;
+      sum = 0;
     }else{
       const proto::DoubleStatistics& stats = pb.doublestatistics();
       _hasMinimum = stats.has_minimum();
@@ -1880,6 +1898,10 @@ namespace orc {
       _hasMinimum = false;
       _hasMaximum = false;
       _hasSum = false;
+
+      minimum = 0;
+      maximum = 0;
+      sum = 0;
     }else{
       const proto::IntegerStatistics& stats = pb.intstatistics();
       _hasMinimum = stats.has_minimum();
@@ -1899,6 +1921,8 @@ namespace orc {
       _hasMinimum = false;
       _hasMaximum = false;
       _hasTotalLength = false;
+      
+      totalLength = 0;
     }else{
       const proto::StringStatistics& stats = pb.stringstatistics();
       _hasMinimum = stats.has_minimum();
@@ -1912,11 +1936,13 @@ namespace orc {
   }
 
   TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
-  (const proto::ColumnStatistics& pb, bool correctStats){
+  (const proto::ColumnStatistics& pb, bool correctStats) {
     valueCount = pb.numberofvalues();
     if (!pb.has_timestampstatistics() || !correctStats) {
       _hasMinimum = false;
       _hasMaximum = false;
+      minimum = 0;
+      maximum = 0;
     }else{
       const proto::TimestampStatistics& stats = pb.timestampstatistics();
       _hasMinimum = stats.has_minimum();

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/Vector.cc
----------------------------------------------------------------------
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 55167f4..6c698db 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -232,6 +232,8 @@ namespace orc {
 
   Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
                  ): ColumnVectorBatch(cap, pool),
+                    precision(0),
+                    scale(0),
                     values(pool, cap),
                     readScales(pool, cap) {
     // PASS
@@ -258,6 +260,8 @@ namespace orc {
 
   Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
                ): ColumnVectorBatch(cap, pool),
+                  precision(0),
+                  scale(0),
                   values(pool, cap),
                   readScales(pool, cap) {
     // PASS

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/test/TestCompression.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc
index 0f6eb3c..d47dd7a 100644
--- a/c++/test/TestCompression.cc
+++ b/c++/test/TestCompression.cc
@@ -38,7 +38,13 @@ namespace orc {
     // Per-test-case set-up.
     static void SetUpTestCase() {
       simpleFile = "simple-file.binary";
-      remove(simpleFile);
+      if (remove(simpleFile) != 0) {
+        if (errno != ENOENT) {
+          std::cerr << "Can't remove simple-file.binary: "
+                    << strerror(errno) << "\n";
+          throw std::runtime_error("Can't remove file");
+        }
+      }
       std::ofstream file;
       file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
       file.open(simpleFile,

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/test/TestInt128.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index cc3b30f..2d604f6 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -431,7 +431,7 @@ namespace orc {
 
   TEST(Int128, testBuildFromArray) {
     Int128 result;
-    uint32_t array[4]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210};
+    uint32_t array[5]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210, 0};
 
     buildFromArray(result, array, 0);
     EXPECT_EQ(0, result.toLong());

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index afda84a..694fea3 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -25,24 +25,9 @@
 #include <iostream>
 #include <string>
 
-int main(int argc, char* argv[]) {
-  if (argc < 2) {
-    std::cout << "Usage: file-contents <filename>\n";
-    return 1;
-  }
-  orc::ReaderOptions opts;
-  std::list<int64_t> cols;
-  cols.push_back(0);
-  opts.include(cols);
-
+void printContents(const char* filename, const orc::ReaderOptions opts) {
   std::unique_ptr<orc::Reader> reader;
-  try{
-    reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
-  } catch (orc::ParseError e) {
-    std::cout << "Error reading file " << argv[1] << "! "
-              << e.what() << std::endl;
-    return -1;
-  }
+  reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
 
   std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
   std::string line;
@@ -59,5 +44,19 @@ int main(int argc, char* argv[]) {
       fwrite(str, 1, strlen(str), stdout);
     }
   }
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    std::cout << "Usage: file-contents <filename>\n";
+    return 1;
+  }
+  try {
+    orc::ReaderOptions opts;
+    printContents(argv[1], opts);
+  } catch (std::exception& ex) {
+    std::cerr << "Caught exception: " << ex.what() << "\n";
+    return 1;
+  }
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index d193ad8..13db666 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -28,21 +28,27 @@
 
 using namespace orc::proto;
 
-uint64_t getTotalPaddingSize(Footer footer);
+uint64_t getTotalPaddingSize(const Footer& footer) {
+  uint64_t paddedBytes = 0;
+  StripeInformation stripe;
+  for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
+      stripe = footer.stripes(stripeIx-1);
+      uint64_t prevStripeOffset = stripe.offset();
+      uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
+        stripe.footerlength();
+      paddedBytes += footer.stripes(stripeIx).offset() -
+        (prevStripeOffset + prevStripeLen);
+  };
+  return paddedBytes;
+}
 
-int main(int argc, char* argv[])
-{
+void printMetadata(const char*filename) {
+  std::streamsize origPrecision(std::cout.precision());
+  std::ios::fmtflags origFlags(std::cout.flags());
+  std::cout << "Structure for " << filename << std::endl;
   std::ifstream input;
 
-  GOOGLE_PROTOBUF_VERIFY_VERSION;
-
-  if (argc < 2) {
-    std::cout << "Usage: file-metadata <filename>\n";
-  }
-
-  std::cout << "Structure for " << argv[1] << std::endl;
-
-  input.open(argv[1], std::ios::in | std::ios::binary);
+  input.open(filename, std::ios::in | std::ios::binary);
   input.seekg(0,input.end);
   std::streamoff fileSize = input.tellg();
 
@@ -50,8 +56,7 @@ int main(int argc, char* argv[])
   input.seekg(fileSize-1);
   int result = input.get();
   if (result == EOF) {
-    std::cerr << "Failed to read postscript size\n";
-    return -1;
+    throw std::runtime_error("Failed to read postscript size");
   }
   std::streamoff postscriptSize = result;
 
@@ -73,10 +78,9 @@ int main(int argc, char* argv[])
   case SNAPPY:
   case LZO:
   default:
-      std::cout << "ORC files with compression are not supported" << std::endl ;
-      input.close();
-      return -1;
-  };
+    input.close();
+    throw std::logic_error("ORC files with compression are not supported");
+  }
 
   std::streamoff footerSize =
     static_cast<std::streamoff>(postscript.footerlength());
@@ -104,8 +108,7 @@ int main(int argc, char* argv[])
   StripeInformation stripe ;
   Stream section;
   ColumnEncoding encoding;
-  for (int stripeIx=0; stripeIx<footer.stripes_size(); stripeIx++)
-  {
+  for (int stripeIx=0; stripeIx<footer.stripes_size(); stripeIx++) {
       std::cout << "Stripe " << stripeIx+1 <<": " << std::endl ;
       stripe = footer.stripes(stripeIx);
       stripe.PrintDebugString();
@@ -154,28 +157,27 @@ int main(int argc, char* argv[])
   std::cout <<"Padding length: " << paddedBytes << " bytes" << std::endl;
   std::cout <<"Padding ratio: " << std::fixed << std::setprecision(2)
             << percentPadding << " %" << std::endl;
-
+  std::cout.precision(origPrecision);
+  std::cout.flags(origFlags);
   input.close();
+}
 
+int main(int argc, char* argv[]) {
+  GOOGLE_PROTOBUF_VERIFY_VERSION;
 
+  if (argc < 2) {
+    std::cout << "Usage: file-metadata <filename>\n";
+  }
+  try {
+    printMetadata(argv[1]);
+  } catch (std::exception& ex) {
+    std::cerr << "Caught exception: " << ex.what() << "\n";
+    return 1;
+  }
 
   google::protobuf::ShutdownProtobufLibrary();
 
   return 0;
 }
 
-uint64_t getTotalPaddingSize(Footer footer) {
-  uint64_t paddedBytes = 0;
-  StripeInformation stripe;
-  for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
-      stripe = footer.stripes(stripeIx-1);
-      uint64_t prevStripeOffset = stripe.offset();
-      uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
-        stripe.footerlength();
-      paddedBytes += footer.stripes(stripeIx).offset() -
-        (prevStripeOffset + prevStripeLen);
-  };
-  return paddedBytes;
-}
-
 

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index 85b7617..cbb5980 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -38,10 +38,9 @@ int main(int argc, char* argv[]) {
   std::unique_ptr<orc::Reader> reader;
   try{
     reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
-  } catch (orc::ParseError e) {
-    std::cout << "Error reading file " << argv[1] << "! "
-              << e.what() << std::endl;
-    return -1;
+  } catch (std::exception& ex) {
+    std::cerr << "Caught exception: " << ex.what() << "\n";
+    return 1;
   }
 
   std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileStatistics.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileStatistics.cc b/tools/src/FileStatistics.cc
index 5ae677b..98edcf8 100644
--- a/tools/src/FileStatistics.cc
+++ b/tools/src/FileStatistics.cc
@@ -24,28 +24,15 @@
 #include <iostream>
 #include <string>
 
-int main(int argc, char* argv[]) {
-  if (argc < 2) {
-    std::cout << "Usage: file-metadata <filename>\n";
-  }
+void printStatistics(const char *filename) {
 
   orc::ReaderOptions opts;
-  std::list<int64_t> cols;
-  cols.push_back(0);
-  opts.include(cols);
-
   std::unique_ptr<orc::Reader> reader;
-  try{
-    reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
-  } catch (orc::ParseError e) {
-    std::cout << "Error reading file " << argv[1] << "! "
-              << e.what() << std::endl;
-    return -1;
-  }
+  reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
 
   // print out all selected columns statistics.
   std::unique_ptr<orc::Statistics> colStats = reader->getStatistics();
-  std::cout << "File " << argv[1] << " has "
+  std::cout << "File " << filename << " has "
             << colStats->getNumberOfColumns() << " columns"  << std::endl;
   for(uint32_t i=0; i < colStats->getNumberOfColumns(); ++i) {
     std::cout << "*** Column " << i << " ***" << std::endl;
@@ -54,14 +41,15 @@ int main(int argc, char* argv[]) {
 
   // test stripe statistics
   std::unique_ptr<orc::Statistics> stripeStats;
-  std::cout << "File " << argv[1] << " has " << reader->getNumberOfStripes()
+  std::cout << "File " << filename << " has " << reader->getNumberOfStripes()
             << " stripes"  << std::endl;
-  if(reader->getNumberOfStripeStatistics() == 0){
-    std::cout << "File " << argv[1] << " doesn't have stripe statistics"  << std::endl;
-  }else{
+  if (reader->getNumberOfStripeStatistics() == 0) {
+    std::cout << "File " << filename << " doesn't have stripe statistics"
+              << std::endl;
+  } else {
     for (unsigned int j = 0; j < reader->getNumberOfStripeStatistics(); j++) {
       stripeStats = reader->getStripeStatistics(j);
-      std::cout << "*** Stripe " << j << " ***" << std::endl << std::endl ;
+      std::cout << "*** Stripe " << j << " ***" << std::endl << std::endl;
 
       for(unsigned int k = 0; k < stripeStats->getNumberOfColumns(); ++k) {
         std::cout << "--- Column " << k << " ---" << std::endl;
@@ -70,6 +58,19 @@ int main(int argc, char* argv[]) {
       }
     }
   }
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 2) {
+    std::cout << "Usage: file-metadata <filename>\n";
+  }
+
+  try {
+    printStatistics(argv[1]);
+  } catch (std::exception& ex) {
+    std::cerr << "Caught exception: " << ex.what() << "\n";
+    return 1;
+  }
 
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index 4d53a62..ebc326f 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -132,18 +132,6 @@ namespace orc {
     EXPECT_EQ(GetParam().typeString, reader->getType().toString());
   }
 
-  std::string getOutput(FILE* outputFile) {
-    size_t posn = static_cast<size_t>(ftell(outputFile));
-    rewind(outputFile);
-    char *buffer = new char[posn];
-    size_t sizeRead = fread(buffer, 1, posn, outputFile);
-    if (sizeRead != posn) {
-      throw std::runtime_error("Bad read");
-    }
-    rewind(outputFile);
-    return std::string(buffer, posn);
-  }
-
   TEST_P(MatchTest, Contents) {
     orc::ReaderOptions opts;
     std::unique_ptr<Reader> reader =
@@ -579,9 +567,12 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
 
     unsigned long rowCount = 0;
     std::unique_ptr<ColumnVectorBatch> batch = reader->createRowBatch(1024);
-    LongVectorBatch* longVector =
-      dynamic_cast<LongVectorBatch*>
-      (dynamic_cast<StructVectorBatch&>(*batch).fields[0]);
+    StructVectorBatch* structBatch =
+      dynamic_cast<StructVectorBatch*>(batch.get());
+    ASSERT_TRUE(structBatch != nullptr);
+    LongVectorBatch* longVector = dynamic_cast<LongVectorBatch*>
+      (structBatch->fields[0]);
+    ASSERT_TRUE(longVector != nullptr);
     int64_t* idCol = longVector->data.data();
     while (reader->next(*batch)) {
       EXPECT_EQ(rowCount, reader->getRowNumber());
@@ -643,16 +634,24 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
       EXPECT_EQ(5000, fullBatch->numElements);
     }
 
-    std::unique_ptr<ColumnVectorBatch> offsetBatch =
-      offsetReader->createRowBatch(5000);
+    StructVectorBatch *fullStructBatch =
+      dynamic_cast<StructVectorBatch*>(fullBatch.get());
+    ASSERT_TRUE(fullStructBatch != nullptr);
     LongVectorBatch* fullLongVector =
-      dynamic_cast<LongVectorBatch*>
-      (dynamic_cast<StructVectorBatch&>(*fullBatch).fields[0]);
+      dynamic_cast<LongVectorBatch*>(fullStructBatch->fields[0]);
+    ASSERT_TRUE(fullLongVector != nullptr);
     int64_t* fullId = fullLongVector->data.data();
+
+    std::unique_ptr<ColumnVectorBatch> offsetBatch =
+      offsetReader->createRowBatch(5000);
+    StructVectorBatch* offsetStructBatch =
+      dynamic_cast<StructVectorBatch*>(offsetBatch.get());
+    ASSERT_TRUE(offsetStructBatch != nullptr);
     LongVectorBatch* offsetLongVector =
-      dynamic_cast<LongVectorBatch*>
-      (dynamic_cast<StructVectorBatch&>(*offsetBatch).fields[0]);
+      dynamic_cast<LongVectorBatch*>(offsetStructBatch->fields[0]);
+    ASSERT_TRUE(offsetLongVector != nullptr);
     int64_t* offsetId = offsetLongVector->data.data();
+
     for (int i=7; i < 17; ++i) {
       EXPECT_TRUE(fullReader->next(*fullBatch));
       EXPECT_TRUE(offsetReader->next(*offsetBatch));
@@ -671,10 +670,14 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
 
     std::unique_ptr<ColumnVectorBatch> lastBatch =
       lastReader->createRowBatch(5000);
+    StructVectorBatch* lastStructBatch =
+      dynamic_cast<StructVectorBatch*>(lastBatch.get());
+    ASSERT_TRUE(lastStructBatch != nullptr);
     LongVectorBatch* lastLongVector =
-      dynamic_cast<LongVectorBatch*>
-      (dynamic_cast<StructVectorBatch&>(*lastBatch).fields[0]);
+      dynamic_cast<LongVectorBatch*>(lastStructBatch->fields[0]);
+    ASSERT_TRUE(lastLongVector != nullptr);
     int64_t* lastId = lastLongVector->data.data();
+
     EXPECT_TRUE(fullReader->next(*fullBatch));
     EXPECT_TRUE(lastReader->next(*lastBatch));
     EXPECT_EQ(fullBatch->numElements, lastBatch->numElements);
@@ -702,19 +705,22 @@ TEST(Reader, columnStatistics) {
   // 6th real column, start from 1
   std::unique_ptr<orc::ColumnStatistics> col_6 =
     reader->getColumnStatistics(6);
-  const orc::StringColumnStatistics& strStats =
-    dynamic_cast<const orc::StringColumnStatistics&> (*(col_6.get()));
-  EXPECT_EQ("Good", strStats.getMinimum());
-  EXPECT_EQ("Unknown", strStats.getMaximum());
+  const orc::StringColumnStatistics* strStats =
+    dynamic_cast<const orc::StringColumnStatistics*> (col_6.get());
+  ASSERT_TRUE(strStats != nullptr);
+
+  EXPECT_EQ("Good", strStats->getMinimum());
+  EXPECT_EQ("Unknown", strStats->getMaximum());
 
   // 7th real column
   std::unique_ptr<orc::ColumnStatistics> col_7 =
     reader->getColumnStatistics(7);
-  const orc::IntegerColumnStatistics& intStats =
-    dynamic_cast<const orc::IntegerColumnStatistics&> (*(col_7.get()));
-  EXPECT_EQ(0, intStats.getMinimum());
-  EXPECT_EQ(6, intStats.getMaximum());
-  EXPECT_EQ(5762400, intStats.getSum());
+  const orc::IntegerColumnStatistics* intStats =
+    dynamic_cast<const orc::IntegerColumnStatistics*> (col_7.get());
+  ASSERT_TRUE(intStats != nullptr);
+  EXPECT_EQ(0, intStats->getMinimum());
+  EXPECT_EQ(6, intStats->getMaximum());
+  EXPECT_EQ(5762400, intStats->getSum());
 }
 
 TEST(Reader, stripeStatistics) {
@@ -737,6 +743,7 @@ TEST(Reader, stripeStatistics) {
   const orc::StringColumnStatistics* col_6 =
     dynamic_cast<const orc::StringColumnStatistics*>
     (stripeStats->getColumnStatistics(6));
+  ASSERT_TRUE(col_6 != nullptr);
   EXPECT_EQ("Unknown", col_6->getMinimum());
   EXPECT_EQ("Unknown", col_6->getMaximum());
 
@@ -744,6 +751,7 @@ TEST(Reader, stripeStatistics) {
   const orc::IntegerColumnStatistics* col_7 =
     dynamic_cast<const orc::IntegerColumnStatistics*>
     (stripeStats->getColumnStatistics(7));
+  ASSERT_TRUE(col_7 != nullptr);
   EXPECT_EQ(6, col_7->getMinimum());
   EXPECT_EQ(6, col_7->getMaximum());
   EXPECT_EQ(4800, col_7->getSum());
@@ -776,6 +784,7 @@ TEST(Reader, corruptStatistics) {
   const orc::DecimalColumnStatistics* col_4 =
     dynamic_cast<const orc::DecimalColumnStatistics*>
     (stripeStats->getColumnStatistics(4));
+  ASSERT_TRUE(col_4 != nullptr);
   EXPECT_EQ(true, !col_4->hasMinimum());
   EXPECT_EQ(true, !col_4->hasMaximum());
 }