You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by md...@apache.org on 2018/03/08 02:22:24 UTC
orc git commit: ORC-311: [C++] Fix null pointers when StripeFooter
corrupts
Repository: orc
Updated Branches:
refs/heads/master 65c5eeff1 -> 59c359dfc
ORC-311: [C++] Fix null pointers when StripeFooter corrupts
Fixes #223
Signed-off-by: Deepak Majeti <md...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/59c359df
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/59c359df
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/59c359df
Branch: refs/heads/master
Commit: 59c359dfceeafe26898dfc635b5fac679e51d277
Parents: 65c5eef
Author: stiga-huang <hu...@gmail.com>
Authored: Fri Mar 2 06:53:44 2018 -0800
Committer: Deepak Majeti <md...@apache.org>
Committed: Wed Mar 7 21:21:51 2018 -0500
----------------------------------------------------------------------
c++/src/ColumnReader.cc | 136 ++++++++++++++++++++++++-------------------
1 file changed, 77 insertions(+), 59 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/59c359df/c++/src/ColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 24f0820..ee2d80d 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -146,9 +146,11 @@ namespace orc {
BooleanColumnReader::BooleanColumnReader(const Type& type,
StripeStreams& stripe
): ColumnReader(type, stripe){
- rle = createBooleanRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true));
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("DATA stream not found in Boolean column");
+ rle = createBooleanRleDecoder(std::move(stream));
}
BooleanColumnReader::~BooleanColumnReader() {
@@ -191,9 +193,11 @@ namespace orc {
ByteColumnReader::ByteColumnReader(const Type& type,
StripeStreams& stripe
): ColumnReader(type, stripe){
- rle = createByteRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true));
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("DATA stream not found in Byte column");
+ rle = createByteRleDecoder(std::move(stream));
}
ByteColumnReader::~ByteColumnReader() {
@@ -237,10 +241,11 @@ namespace orc {
StripeStreams& stripe
): ColumnReader(type, stripe) {
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
- rle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true),
- true, vers, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("DATA stream not found in Integer column");
+ rle = createRleDecoder(std::move(stream), true, vers, memoryPool);
}
IntegerColumnReader::~IntegerColumnReader() {
@@ -286,14 +291,15 @@ namespace orc {
writerTimezone(stripe.getWriterTimezone()),
epochOffset(writerTimezone.getEpoch()) {
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
- secondsRle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true),
- true, vers, memoryPool);
- nanoRle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_SECONDARY,
- true),
- false, vers, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("DATA stream not found in Timestamp column");
+ secondsRle = createRleDecoder(std::move(stream), true, vers, memoryPool);
+ stream = stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
+ if (stream == nullptr)
+ throw ParseError("SECONDARY stream not found in Timestamp column");
+ nanoRle = createRleDecoder(std::move(stream), false, vers, memoryPool);
}
TimestampColumnReader::~TimestampColumnReader() {
@@ -391,17 +397,14 @@ namespace orc {
DoubleColumnReader::DoubleColumnReader(const Type& type,
StripeStreams& stripe
): ColumnReader(type, stripe),
- inputStream
- (stripe.getStream
- (columnId,
- proto::Stream_Kind_DATA,
- true)),
columnKind(type.getKind()),
bytesPerValue((type.getKind() ==
FLOAT) ? 4 : 8),
bufferPointer(nullptr),
bufferEnd(nullptr) {
- // PASS
+ inputStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (inputStream == nullptr)
+ throw ParseError("DATA stream not found in Double column");
}
DoubleColumnReader::~DoubleColumnReader() {
@@ -413,11 +416,10 @@ namespace orc {
if (static_cast<size_t>(bufferEnd - bufferPointer) >=
bytesPerValue * numValues) {
- bufferPointer+= bytesPerValue*numValues;
+ bufferPointer += bytesPerValue * numValues;
} else {
- inputStream->Skip(static_cast<int>(bytesPerValue*numValues -
- static_cast<size_t>(bufferEnd -
- bufferPointer)));
+ inputStream->Skip(static_cast<int>(bytesPerValue * numValues -
+ static_cast<size_t>(bufferEnd - bufferPointer)));
bufferEnd = nullptr;
bufferPointer = nullptr;
}
@@ -503,28 +505,34 @@ namespace orc {
RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
.kind());
dictionaryCount = stripe.getEncoding(columnId).dictionarysize();
- rle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true),
- false, rleVersion, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("DATA stream not found in StringDictionaryColumn");
+ rle = createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
+ stream = stripe.getStream(columnId, proto::Stream_Kind_LENGTH, false);
+ if (dictionaryCount > 0 && stream == nullptr) {
+ throw ParseError("LENGTH stream not found in StringDictionaryColumn");
+ }
std::unique_ptr<RleDecoder> lengthDecoder =
- createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_LENGTH,
- false),
- false, rleVersion, memoryPool);
+ createRleDecoder(std::move(stream), false, rleVersion, memoryPool);
dictionaryOffset.resize(dictionaryCount+1);
int64_t* lengthArray = dictionaryOffset.data();
lengthDecoder->next(lengthArray + 1, dictionaryCount, nullptr);
lengthArray[0] = 0;
- for(uint64_t i=1; i < dictionaryCount + 1; ++i) {
+ for (uint64_t i = 1; i < dictionaryCount + 1; ++i) {
if (lengthArray[i] < 0)
throw ParseError("Negative dictionary entry length");
- lengthArray[i] += lengthArray[i-1];
+ lengthArray[i] += lengthArray[i - 1];
}
int64_t blobSize = lengthArray[dictionaryCount];
dictionaryBlob.resize(static_cast<uint64_t>(blobSize));
std::unique_ptr<SeekableInputStream> blobStream =
- stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
+ stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
+ if (blobSize > 0 && blobStream == nullptr) {
+ throw ParseError(
+ "DICTIONARY_DATA stream not found in StringDictionaryColumn");
+ }
readFully(dictionaryBlob.data(), blobSize, blobStream.get());
}
@@ -611,11 +619,15 @@ namespace orc {
blobBuffer(stripe.getMemoryPool()) {
RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId)
.kind());
- lengthRle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_LENGTH,
- true),
- false, rleVersion, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+ if (stream == nullptr)
+ throw ParseError("LENGTH stream not found in StringDirectColumn");
+ lengthRle = createRleDecoder(
+ std::move(stream), false, rleVersion, memoryPool);
blobStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (blobStream == nullptr)
+ throw ParseError("DATA stream not found in StringDirectColumn");
lastBuffer = nullptr;
lastBufferLength = 0;
}
@@ -847,10 +859,11 @@ namespace orc {
// count the number of selected sub-columns
const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
- rle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_LENGTH,
- true),
- false, vers, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+ if (stream == nullptr)
+ throw ParseError("LENGTH stream not found in List column");
+ rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
const Type& childType = *type.getSubtype(0);
if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) {
child = buildReader(childType, stripe);
@@ -940,10 +953,11 @@ namespace orc {
// Determine if the key and/or value columns are selected
const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
- rle = createRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_LENGTH,
- true),
- false, vers, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_LENGTH, true);
+ if (stream == nullptr)
+ throw ParseError("LENGTH stream not found in Map column");
+ rle = createRleDecoder(std::move(stream), false, vers, memoryPool);
const Type& keyType = *type.getSubtype(0);
if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) {
keyReader = buildReader(keyType, stripe);
@@ -1049,9 +1063,11 @@ namespace orc {
childrenReader.resize(numChildren);
childrenCounts.resize(numChildren);
- rle = createByteRleDecoder(stripe.getStream(columnId,
- proto::Stream_Kind_DATA,
- true));
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (stream == nullptr)
+ throw ParseError("LENGTH stream not found in Union column");
+ rle = createByteRleDecoder(std::move(stream));
// figure out which types are selected
const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
for(unsigned int i=0; i < numChildren; ++i) {
@@ -1232,14 +1248,16 @@ namespace orc {
scale = static_cast<int32_t>(type.getScale());
precision = static_cast<int32_t>(type.getPrecision());
valueStream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
+ if (valueStream == nullptr)
+ throw ParseError("DATA stream not found in Decimal64Column");
buffer = nullptr;
bufferEnd = nullptr;
RleVersion vers = convertRleVersion(stripe.getEncoding(columnId).kind());
- scaleDecoder = createRleDecoder(stripe.getStream
- (columnId,
- proto::Stream_Kind_SECONDARY,
- true),
- true, vers, memoryPool);
+ std::unique_ptr<SeekableInputStream> stream =
+ stripe.getStream(columnId, proto::Stream_Kind_SECONDARY, true);
+ if (stream == nullptr)
+ throw ParseError("SECONDARY stream not found in Decimal64Column");
+ scaleDecoder = createRleDecoder(std::move(stream), true, vers, memoryPool);
}
Decimal64ColumnReader::~Decimal64ColumnReader() {