You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/08/26 18:57:32 UTC
orc git commit: ORC-25. Clean up Coverity warnings.
Repository: orc
Updated Branches:
refs/heads/master 891c538a9 -> bec0d82fd
ORC-25. Clean up Coverity warnings.
closes apache/orc#7
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/bec0d82f
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/bec0d82f
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/bec0d82f
Branch: refs/heads/master
Commit: bec0d82fdbec0a5d6dfd312d34121049763d8ca7
Parents: 891c538
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Aug 25 14:45:39 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed Aug 26 09:55:31 2015 -0700
----------------------------------------------------------------------
c++/src/ColumnPrinter.cc | 44 +++++++++++++++++-------
c++/src/MemoryPool.cc | 2 +-
c++/src/RLEv1.cc | 5 ++-
c++/src/RLEv2.cc | 6 ++--
c++/src/Reader.cc | 62 ++++++++++++++++++++++++----------
c++/src/Vector.cc | 4 +++
c++/test/TestCompression.cc | 8 ++++-
c++/test/TestInt128.cc | 2 +-
tools/src/FileContents.cc | 33 +++++++++---------
tools/src/FileMetadata.cc | 72 ++++++++++++++++++++-------------------
tools/src/FileScan.cc | 7 ++--
tools/src/FileStatistics.cc | 43 +++++++++++------------
tools/test/TestReader.cc | 73 ++++++++++++++++++++++------------------
13 files changed, 215 insertions(+), 146 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 1367b02..aa90be6 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -279,8 +279,9 @@ namespace orc {
LongColumnPrinter::LongColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
- // pass
+ ): ColumnPrinter(buffer, type),
+ data(nullptr) {
+ // PASS
}
void LongColumnPrinter::reset(const ColumnVectorBatch& batch) {
@@ -302,6 +303,7 @@ namespace orc {
DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer, type),
+ data(nullptr),
isFloat(type.getKind() == FLOAT){
// PASS
}
@@ -325,7 +327,9 @@ namespace orc {
Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
- type) {
+ type),
+ data(nullptr),
+ scale(0) {
// PASS
}
@@ -375,14 +379,16 @@ namespace orc {
Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
- type) {
+ type),
+ data(nullptr),
+ scale(0) {
// PASS
}
void Decimal128ColumnPrinter::reset(const ColumnVectorBatch& batch) {
ColumnPrinter::reset(batch);
data = dynamic_cast<const Decimal128VectorBatch&>(batch).values.data();
- scale =dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
+ scale = dynamic_cast<const Decimal128VectorBatch&>(batch).scale;
}
void Decimal128ColumnPrinter::printRow(uint64_t rowId) {
@@ -395,7 +401,9 @@ namespace orc {
StringColumnPrinter::StringColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ start(nullptr),
+ length(nullptr) {
// PASS
}
@@ -445,7 +453,8 @@ namespace orc {
ListColumnPrinter::ListColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ offsets(nullptr) {
elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
}
@@ -473,7 +482,8 @@ namespace orc {
MapColumnPrinter::MapColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ offsets(nullptr) {
keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
}
@@ -507,7 +517,9 @@ namespace orc {
UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ tags(nullptr),
+ offsets(nullptr) {
for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
.release());
@@ -590,7 +602,8 @@ namespace orc {
DateColumnPrinter::DateColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ data(nullptr) {
// PASS
}
@@ -616,7 +629,8 @@ namespace orc {
BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ data(nullptr) {
// PASS
}
@@ -635,7 +649,9 @@ namespace orc {
BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer, type),
+ start(nullptr),
+ length(nullptr) {
// PASS
}
@@ -666,7 +682,9 @@ namespace orc {
TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
const Type& type
): ColumnPrinter(buffer,
- type) {
+ type),
+ seconds(nullptr),
+ nanoseconds(nullptr) {
// PASS
}
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/MemoryPool.cc
----------------------------------------------------------------------
diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc
index 28cc9e1..7fecf67 100644
--- a/c++/src/MemoryPool.cc
+++ b/c++/src/MemoryPool.cc
@@ -88,7 +88,7 @@ namespace orc {
template <class T>
void DataBuffer<T>::reserve(uint64_t newCapacity){
- if (newCapacity > currentCapacity) {
+ if (newCapacity > currentCapacity || !buf) {
if (buf) {
T* buf_old = buf;
buf = reinterpret_cast<T*>(memoryPool.malloc(sizeof(T) * newCapacity));
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/RLEv1.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
index 91bb79d..0a9a65e 100644
--- a/c++/src/RLEv1.cc
+++ b/c++/src/RLEv1.cc
@@ -86,8 +86,11 @@ RleDecoderV1::RleDecoderV1(std::unique_ptr<SeekableInputStream> input,
: inputStream(std::move(input)),
isSigned(hasSigned),
remainingValues(0),
+ value(0),
bufferStart(nullptr),
- bufferEnd(bufferStart) {
+ bufferEnd(bufferStart),
+ delta(0),
+ repeating(false) {
}
void RleDecoderV1::seek(PositionProvider& location) {
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/RLEv2.cc
----------------------------------------------------------------------
diff --git a/c++/src/RLEv2.cc b/c++/src/RLEv2.cc
index 43428b4..02f325f 100644
--- a/c++/src/RLEv2.cc
+++ b/c++/src/RLEv2.cc
@@ -34,8 +34,7 @@ struct FixedBitSizes {
};
inline uint32_t decodeBitWidth(uint32_t n) {
- if (n >= FixedBitSizes::ONE &&
- n <= FixedBitSizes::TWENTYFOUR) {
+ if (n <= FixedBitSizes::TWENTYFOUR) {
return n + 1;
} else if (n == FixedBitSizes::TWENTYSIX) {
return 26;
@@ -125,8 +124,11 @@ RleDecoderV2::RleDecoderV2(std::unique_ptr<SeekableInputStream> input,
bitsLeft(0),
curByte(0),
patchBitSize(0),
+ unpackedIdx(0),
+ patchIdx(0),
base(0),
curGap(0),
+ curPatch(0),
patchMask(0),
actualGap(0),
unpacked(pool, 0),
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index b7cf00d..cb0647d 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -1525,12 +1525,16 @@ namespace orc {
result = new TimestampVectorBatch(capacity, memoryPool);
break;
case STRUCT:
- result = new StructVectorBatch(capacity, memoryPool);
- for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
- subtype = &(type.getSubtype(i));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- dynamic_cast<StructVectorBatch*>(result)->fields.push_back
- (createRowBatch(*subtype, capacity).release());
+ {
+ StructVectorBatch *structResult =
+ new StructVectorBatch(capacity, memoryPool);
+ result = structResult;
+ for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
+ subtype = &(type.getSubtype(i));
+ if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
+ structResult->fields.push_back(createRowBatch(*subtype,
+ capacity).release());
+ }
}
}
break;
@@ -1563,12 +1567,16 @@ namespace orc {
}
break;
case UNION:
- result = new UnionVectorBatch(capacity, memoryPool);
- for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
- subtype = &(type.getSubtype(i));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- dynamic_cast<UnionVectorBatch*>(result)->children.push_back
- (createRowBatch(*subtype, capacity).release());
+ {
+ UnionVectorBatch *unionResult =
+ new UnionVectorBatch(capacity, memoryPool);
+ result = unionResult;
+ for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
+ subtype = &(type.getSubtype(i));
+ if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
+ unionResult->children.push_back(createRowBatch(*subtype,
+ capacity).release());
+ }
}
}
break;
@@ -1804,6 +1812,8 @@ namespace orc {
valueCount = pb.numberofvalues();
if (!pb.has_binarystatistics() || !correctStats) {
_hasTotalLength = false;
+
+ totalLength = 0;
}else{
_hasTotalLength = pb.binarystatistics().has_sum();
totalLength = static_cast<uint64_t>(pb.binarystatistics().sum());
@@ -1815,6 +1825,7 @@ namespace orc {
valueCount = pb.numberofvalues();
if (!pb.has_bucketstatistics() || !correctStats) {
_hasCount = false;
+ trueCount = 0;
}else{
_hasCount = true;
trueCount = pb.bucketstatistics().count(0);
@@ -1827,11 +1838,14 @@ namespace orc {
if (!pb.has_datestatistics() || !correctStats) {
_hasMinimum = false;
_hasMaximum = false;
- }else{
- _hasMinimum = pb.datestatistics().has_minimum();
- _hasMaximum = pb.datestatistics().has_maximum();
- minimum = pb.datestatistics().minimum();
- maximum = pb.datestatistics().maximum();
+
+ minimum = 0;
+ maximum = 0;
+ } else {
+ _hasMinimum = pb.datestatistics().has_minimum();
+ _hasMaximum = pb.datestatistics().has_maximum();
+ minimum = pb.datestatistics().minimum();
+ maximum = pb.datestatistics().maximum();
}
}
@@ -1861,6 +1875,10 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasSum = false;
+
+ minimum = 0;
+ maximum = 0;
+ sum = 0;
}else{
const proto::DoubleStatistics& stats = pb.doublestatistics();
_hasMinimum = stats.has_minimum();
@@ -1880,6 +1898,10 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasSum = false;
+
+ minimum = 0;
+ maximum = 0;
+ sum = 0;
}else{
const proto::IntegerStatistics& stats = pb.intstatistics();
_hasMinimum = stats.has_minimum();
@@ -1899,6 +1921,8 @@ namespace orc {
_hasMinimum = false;
_hasMaximum = false;
_hasTotalLength = false;
+
+ totalLength = 0;
}else{
const proto::StringStatistics& stats = pb.stringstatistics();
_hasMinimum = stats.has_minimum();
@@ -1912,11 +1936,13 @@ namespace orc {
}
TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl
- (const proto::ColumnStatistics& pb, bool correctStats){
+ (const proto::ColumnStatistics& pb, bool correctStats) {
valueCount = pb.numberofvalues();
if (!pb.has_timestampstatistics() || !correctStats) {
_hasMinimum = false;
_hasMaximum = false;
+ minimum = 0;
+ maximum = 0;
}else{
const proto::TimestampStatistics& stats = pb.timestampstatistics();
_hasMinimum = stats.has_minimum();
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/src/Vector.cc
----------------------------------------------------------------------
diff --git a/c++/src/Vector.cc b/c++/src/Vector.cc
index 55167f4..6c698db 100644
--- a/c++/src/Vector.cc
+++ b/c++/src/Vector.cc
@@ -232,6 +232,8 @@ namespace orc {
Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
+ precision(0),
+ scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
@@ -258,6 +260,8 @@ namespace orc {
Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
+ precision(0),
+ scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/test/TestCompression.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc
index 0f6eb3c..d47dd7a 100644
--- a/c++/test/TestCompression.cc
+++ b/c++/test/TestCompression.cc
@@ -38,7 +38,13 @@ namespace orc {
// Per-test-case set-up.
static void SetUpTestCase() {
simpleFile = "simple-file.binary";
- remove(simpleFile);
+ if (remove(simpleFile) != 0) {
+ if (errno != ENOENT) {
+ std::cerr << "Can't remove simple-file.binary: "
+ << strerror(errno) << "\n";
+ throw std::runtime_error("Can't remove file");
+ }
+ }
std::ofstream file;
file.exceptions(std::ofstream::failbit | std::ofstream::badbit);
file.open(simpleFile,
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/c++/test/TestInt128.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestInt128.cc b/c++/test/TestInt128.cc
index cc3b30f..2d604f6 100644
--- a/c++/test/TestInt128.cc
+++ b/c++/test/TestInt128.cc
@@ -431,7 +431,7 @@ namespace orc {
TEST(Int128, testBuildFromArray) {
Int128 result;
- uint32_t array[4]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210};
+ uint32_t array[5]={0x12345678, 0x9abcdef0, 0xfedcba98, 0x76543210, 0};
buildFromArray(result, array, 0);
EXPECT_EQ(0, result.toLong());
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index afda84a..694fea3 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -25,24 +25,9 @@
#include <iostream>
#include <string>
-int main(int argc, char* argv[]) {
- if (argc < 2) {
- std::cout << "Usage: file-contents <filename>\n";
- return 1;
- }
- orc::ReaderOptions opts;
- std::list<int64_t> cols;
- cols.push_back(0);
- opts.include(cols);
-
+void printContents(const char* filename, const orc::ReaderOptions opts) {
std::unique_ptr<orc::Reader> reader;
- try{
- reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
- } catch (orc::ParseError e) {
- std::cout << "Error reading file " << argv[1] << "! "
- << e.what() << std::endl;
- return -1;
- }
+ reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
std::string line;
@@ -59,5 +44,19 @@ int main(int argc, char* argv[]) {
fwrite(str, 1, strlen(str), stdout);
}
}
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 2) {
+ std::cout << "Usage: file-contents <filename>\n";
+ return 1;
+ }
+ try {
+ orc::ReaderOptions opts;
+ printContents(argv[1], opts);
+ } catch (std::exception& ex) {
+ std::cerr << "Caught exception: " << ex.what() << "\n";
+ return 1;
+ }
return 0;
}
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileMetadata.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index d193ad8..13db666 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -28,21 +28,27 @@
using namespace orc::proto;
-uint64_t getTotalPaddingSize(Footer footer);
+uint64_t getTotalPaddingSize(const Footer& footer) {
+ uint64_t paddedBytes = 0;
+ StripeInformation stripe;
+ for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
+ stripe = footer.stripes(stripeIx-1);
+ uint64_t prevStripeOffset = stripe.offset();
+ uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
+ stripe.footerlength();
+ paddedBytes += footer.stripes(stripeIx).offset() -
+ (prevStripeOffset + prevStripeLen);
+ };
+ return paddedBytes;
+}
-int main(int argc, char* argv[])
-{
+void printMetadata(const char*filename) {
+ std::streamsize origPrecision(std::cout.precision());
+ std::ios::fmtflags origFlags(std::cout.flags());
+ std::cout << "Structure for " << filename << std::endl;
std::ifstream input;
- GOOGLE_PROTOBUF_VERIFY_VERSION;
-
- if (argc < 2) {
- std::cout << "Usage: file-metadata <filename>\n";
- }
-
- std::cout << "Structure for " << argv[1] << std::endl;
-
- input.open(argv[1], std::ios::in | std::ios::binary);
+ input.open(filename, std::ios::in | std::ios::binary);
input.seekg(0,input.end);
std::streamoff fileSize = input.tellg();
@@ -50,8 +56,7 @@ int main(int argc, char* argv[])
input.seekg(fileSize-1);
int result = input.get();
if (result == EOF) {
- std::cerr << "Failed to read postscript size\n";
- return -1;
+ throw std::runtime_error("Failed to read postscript size");
}
std::streamoff postscriptSize = result;
@@ -73,10 +78,9 @@ int main(int argc, char* argv[])
case SNAPPY:
case LZO:
default:
- std::cout << "ORC files with compression are not supported" << std::endl ;
- input.close();
- return -1;
- };
+ input.close();
+ throw std::logic_error("ORC files with compression are not supported");
+ }
std::streamoff footerSize =
static_cast<std::streamoff>(postscript.footerlength());
@@ -104,8 +108,7 @@ int main(int argc, char* argv[])
StripeInformation stripe ;
Stream section;
ColumnEncoding encoding;
- for (int stripeIx=0; stripeIx<footer.stripes_size(); stripeIx++)
- {
+ for (int stripeIx=0; stripeIx<footer.stripes_size(); stripeIx++) {
std::cout << "Stripe " << stripeIx+1 <<": " << std::endl ;
stripe = footer.stripes(stripeIx);
stripe.PrintDebugString();
@@ -154,28 +157,27 @@ int main(int argc, char* argv[])
std::cout <<"Padding length: " << paddedBytes << " bytes" << std::endl;
std::cout <<"Padding ratio: " << std::fixed << std::setprecision(2)
<< percentPadding << " %" << std::endl;
-
+ std::cout.precision(origPrecision);
+ std::cout.flags(origFlags);
input.close();
+}
+int main(int argc, char* argv[]) {
+ GOOGLE_PROTOBUF_VERIFY_VERSION;
+ if (argc < 2) {
+ std::cout << "Usage: file-metadata <filename>\n";
+ }
+ try {
+ printMetadata(argv[1]);
+ } catch (std::exception& ex) {
+ std::cerr << "Caught exception: " << ex.what() << "\n";
+ return 1;
+ }
google::protobuf::ShutdownProtobufLibrary();
return 0;
}
-uint64_t getTotalPaddingSize(Footer footer) {
- uint64_t paddedBytes = 0;
- StripeInformation stripe;
- for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
- stripe = footer.stripes(stripeIx-1);
- uint64_t prevStripeOffset = stripe.offset();
- uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
- stripe.footerlength();
- paddedBytes += footer.stripes(stripeIx).offset() -
- (prevStripeOffset + prevStripeLen);
- };
- return paddedBytes;
-}
-
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index 85b7617..cbb5980 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -38,10 +38,9 @@ int main(int argc, char* argv[]) {
std::unique_ptr<orc::Reader> reader;
try{
reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
- } catch (orc::ParseError e) {
- std::cout << "Error reading file " << argv[1] << "! "
- << e.what() << std::endl;
- return -1;
+ } catch (std::exception& ex) {
+ std::cerr << "Caught exception: " << ex.what() << "\n";
+ return 1;
}
std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/src/FileStatistics.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileStatistics.cc b/tools/src/FileStatistics.cc
index 5ae677b..98edcf8 100644
--- a/tools/src/FileStatistics.cc
+++ b/tools/src/FileStatistics.cc
@@ -24,28 +24,15 @@
#include <iostream>
#include <string>
-int main(int argc, char* argv[]) {
- if (argc < 2) {
- std::cout << "Usage: file-metadata <filename>\n";
- }
+void printStatistics(const char *filename) {
orc::ReaderOptions opts;
- std::list<int64_t> cols;
- cols.push_back(0);
- opts.include(cols);
-
std::unique_ptr<orc::Reader> reader;
- try{
- reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
- } catch (orc::ParseError e) {
- std::cout << "Error reading file " << argv[1] << "! "
- << e.what() << std::endl;
- return -1;
- }
+ reader = orc::createReader(orc::readLocalFile(std::string(filename)), opts);
// print out all selected columns statistics.
std::unique_ptr<orc::Statistics> colStats = reader->getStatistics();
- std::cout << "File " << argv[1] << " has "
+ std::cout << "File " << filename << " has "
<< colStats->getNumberOfColumns() << " columns" << std::endl;
for(uint32_t i=0; i < colStats->getNumberOfColumns(); ++i) {
std::cout << "*** Column " << i << " ***" << std::endl;
@@ -54,14 +41,15 @@ int main(int argc, char* argv[]) {
// test stripe statistics
std::unique_ptr<orc::Statistics> stripeStats;
- std::cout << "File " << argv[1] << " has " << reader->getNumberOfStripes()
+ std::cout << "File " << filename << " has " << reader->getNumberOfStripes()
<< " stripes" << std::endl;
- if(reader->getNumberOfStripeStatistics() == 0){
- std::cout << "File " << argv[1] << " doesn't have stripe statistics" << std::endl;
- }else{
+ if (reader->getNumberOfStripeStatistics() == 0) {
+ std::cout << "File " << filename << " doesn't have stripe statistics"
+ << std::endl;
+ } else {
for (unsigned int j = 0; j < reader->getNumberOfStripeStatistics(); j++) {
stripeStats = reader->getStripeStatistics(j);
- std::cout << "*** Stripe " << j << " ***" << std::endl << std::endl ;
+ std::cout << "*** Stripe " << j << " ***" << std::endl << std::endl;
for(unsigned int k = 0; k < stripeStats->getNumberOfColumns(); ++k) {
std::cout << "--- Column " << k << " ---" << std::endl;
@@ -70,6 +58,19 @@ int main(int argc, char* argv[]) {
}
}
}
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 2) {
+ std::cout << "Usage: file-metadata <filename>\n";
+ }
+
+ try {
+ printStatistics(argv[1]);
+ } catch (std::exception& ex) {
+ std::cerr << "Caught exception: " << ex.what() << "\n";
+ return 1;
+ }
return 0;
}
http://git-wip-us.apache.org/repos/asf/orc/blob/bec0d82f/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index 4d53a62..ebc326f 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -132,18 +132,6 @@ namespace orc {
EXPECT_EQ(GetParam().typeString, reader->getType().toString());
}
- std::string getOutput(FILE* outputFile) {
- size_t posn = static_cast<size_t>(ftell(outputFile));
- rewind(outputFile);
- char *buffer = new char[posn];
- size_t sizeRead = fread(buffer, 1, posn, outputFile);
- if (sizeRead != posn) {
- throw std::runtime_error("Bad read");
- }
- rewind(outputFile);
- return std::string(buffer, posn);
- }
-
TEST_P(MatchTest, Contents) {
orc::ReaderOptions opts;
std::unique_ptr<Reader> reader =
@@ -579,9 +567,12 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
unsigned long rowCount = 0;
std::unique_ptr<ColumnVectorBatch> batch = reader->createRowBatch(1024);
- LongVectorBatch* longVector =
- dynamic_cast<LongVectorBatch*>
- (dynamic_cast<StructVectorBatch&>(*batch).fields[0]);
+ StructVectorBatch* structBatch =
+ dynamic_cast<StructVectorBatch*>(batch.get());
+ ASSERT_TRUE(structBatch != nullptr);
+ LongVectorBatch* longVector = dynamic_cast<LongVectorBatch*>
+ (structBatch->fields[0]);
+ ASSERT_TRUE(longVector != nullptr);
int64_t* idCol = longVector->data.data();
while (reader->next(*batch)) {
EXPECT_EQ(rowCount, reader->getRowNumber());
@@ -643,16 +634,24 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
EXPECT_EQ(5000, fullBatch->numElements);
}
- std::unique_ptr<ColumnVectorBatch> offsetBatch =
- offsetReader->createRowBatch(5000);
+ StructVectorBatch *fullStructBatch =
+ dynamic_cast<StructVectorBatch*>(fullBatch.get());
+ ASSERT_TRUE(fullStructBatch != nullptr);
LongVectorBatch* fullLongVector =
- dynamic_cast<LongVectorBatch*>
- (dynamic_cast<StructVectorBatch&>(*fullBatch).fields[0]);
+ dynamic_cast<LongVectorBatch*>(fullStructBatch->fields[0]);
+ ASSERT_TRUE(fullLongVector != nullptr);
int64_t* fullId = fullLongVector->data.data();
+
+ std::unique_ptr<ColumnVectorBatch> offsetBatch =
+ offsetReader->createRowBatch(5000);
+ StructVectorBatch* offsetStructBatch =
+ dynamic_cast<StructVectorBatch*>(offsetBatch.get());
+ ASSERT_TRUE(offsetStructBatch != nullptr);
LongVectorBatch* offsetLongVector =
- dynamic_cast<LongVectorBatch*>
- (dynamic_cast<StructVectorBatch&>(*offsetBatch).fields[0]);
+ dynamic_cast<LongVectorBatch*>(offsetStructBatch->fields[0]);
+ ASSERT_TRUE(offsetLongVector != nullptr);
int64_t* offsetId = offsetLongVector->data.data();
+
for (int i=7; i < 17; ++i) {
EXPECT_TRUE(fullReader->next(*fullBatch));
EXPECT_TRUE(offsetReader->next(*offsetBatch));
@@ -671,10 +670,14 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
std::unique_ptr<ColumnVectorBatch> lastBatch =
lastReader->createRowBatch(5000);
+ StructVectorBatch* lastStructBatch =
+ dynamic_cast<StructVectorBatch*>(lastBatch.get());
+ ASSERT_TRUE(lastStructBatch != nullptr);
LongVectorBatch* lastLongVector =
- dynamic_cast<LongVectorBatch*>
- (dynamic_cast<StructVectorBatch&>(*lastBatch).fields[0]);
+ dynamic_cast<LongVectorBatch*>(lastStructBatch->fields[0]);
+ ASSERT_TRUE(lastLongVector != nullptr);
int64_t* lastId = lastLongVector->data.data();
+
EXPECT_TRUE(fullReader->next(*fullBatch));
EXPECT_TRUE(lastReader->next(*lastBatch));
EXPECT_EQ(fullBatch->numElements, lastBatch->numElements);
@@ -702,19 +705,22 @@ TEST(Reader, columnStatistics) {
// 6th real column, start from 1
std::unique_ptr<orc::ColumnStatistics> col_6 =
reader->getColumnStatistics(6);
- const orc::StringColumnStatistics& strStats =
- dynamic_cast<const orc::StringColumnStatistics&> (*(col_6.get()));
- EXPECT_EQ("Good", strStats.getMinimum());
- EXPECT_EQ("Unknown", strStats.getMaximum());
+ const orc::StringColumnStatistics* strStats =
+ dynamic_cast<const orc::StringColumnStatistics*> (col_6.get());
+ ASSERT_TRUE(strStats != nullptr);
+
+ EXPECT_EQ("Good", strStats->getMinimum());
+ EXPECT_EQ("Unknown", strStats->getMaximum());
// 7th real column
std::unique_ptr<orc::ColumnStatistics> col_7 =
reader->getColumnStatistics(7);
- const orc::IntegerColumnStatistics& intStats =
- dynamic_cast<const orc::IntegerColumnStatistics&> (*(col_7.get()));
- EXPECT_EQ(0, intStats.getMinimum());
- EXPECT_EQ(6, intStats.getMaximum());
- EXPECT_EQ(5762400, intStats.getSum());
+ const orc::IntegerColumnStatistics* intStats =
+ dynamic_cast<const orc::IntegerColumnStatistics*> (col_7.get());
+ ASSERT_TRUE(intStats != nullptr);
+ EXPECT_EQ(0, intStats->getMinimum());
+ EXPECT_EQ(6, intStats->getMaximum());
+ EXPECT_EQ(5762400, intStats->getSum());
}
TEST(Reader, stripeStatistics) {
@@ -737,6 +743,7 @@ TEST(Reader, stripeStatistics) {
const orc::StringColumnStatistics* col_6 =
dynamic_cast<const orc::StringColumnStatistics*>
(stripeStats->getColumnStatistics(6));
+ ASSERT_TRUE(col_6 != nullptr);
EXPECT_EQ("Unknown", col_6->getMinimum());
EXPECT_EQ("Unknown", col_6->getMaximum());
@@ -744,6 +751,7 @@ TEST(Reader, stripeStatistics) {
const orc::IntegerColumnStatistics* col_7 =
dynamic_cast<const orc::IntegerColumnStatistics*>
(stripeStats->getColumnStatistics(7));
+ ASSERT_TRUE(col_7 != nullptr);
EXPECT_EQ(6, col_7->getMinimum());
EXPECT_EQ(6, col_7->getMaximum());
EXPECT_EQ(4800, col_7->getSum());
@@ -776,6 +784,7 @@ TEST(Reader, corruptStatistics) {
const orc::DecimalColumnStatistics* col_4 =
dynamic_cast<const orc::DecimalColumnStatistics*>
(stripeStats->getColumnStatistics(4));
+ ASSERT_TRUE(col_4 != nullptr);
EXPECT_EQ(true, !col_4->hasMinimum());
EXPECT_EQ(true, !col_4->hasMaximum());
}