You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/01/06 21:56:31 UTC
[1/2] orc git commit: Fixed ORC-29: Enable ColumnPrinter to print
only specified columns. (asandryh and omalley)
Repository: orc
Updated Branches:
refs/heads/master b39302f59 -> 3945f0663
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index 4b1b4b1..075a069 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -37,14 +37,14 @@ namespace orc {
class MockStripeStreams: public StripeStreams {
public:
~MockStripeStreams();
- std::unique_ptr<SeekableInputStream> getStream(int64_t columnId,
+ std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool stream) const override;
MOCK_CONST_METHOD0(getReaderOptions, const ReaderOptions&());
MOCK_CONST_METHOD0(getSelectedColumns, const std::vector<bool>());
- MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (int64_t));
+ MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (uint64_t));
MOCK_CONST_METHOD3(getStreamProxy, SeekableInputStream*
- (int64_t, proto::Stream_Kind, bool));
+ (uint64_t, proto::Stream_Kind, bool));
MemoryPool& getMemoryPool() const {
return *getDefaultPool();
}
@@ -60,7 +60,7 @@ MockStripeStreams::~MockStripeStreams() {
}
std::unique_ptr<SeekableInputStream>
-MockStripeStreams::getStream(int64_t columnId,
+MockStripeStreams::getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const {
return std::unique_ptr < SeekableInputStream >
@@ -98,8 +98,7 @@ TEST(TestColumnReader, testBooleanWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BOOLEAN), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BOOLEAN));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -152,8 +151,7 @@ TEST(TestColumnReader, testBooleanSkipsWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BOOLEAN), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BOOLEAN));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -217,8 +215,7 @@ TEST(TestColumnReader, testByteWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BYTE), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BYTE));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -281,8 +278,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BYTE), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BYTE));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -313,7 +309,7 @@ TEST(TestColumnReader, testIntegerWithNulls) {
// set getSelectedColumns()
std::vector<bool> selectedColumns(2, true);
-
+
EXPECT_CALL(streams, getSelectedColumns())
.WillRepeatedly(testing::Return(selectedColumns));
@@ -337,8 +333,7 @@ TEST(TestColumnReader, testIntegerWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(INT), "myInt" );
- rowType->assignIds(0);
+ rowType->addStructField("myInt", createPrimitiveType(INT));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -403,8 +398,7 @@ TEST(TestColumnReader, testDictionaryWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "myString");
- rowType->assignIds(0);
+ rowType->addStructField("myString", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -503,10 +497,9 @@ TEST(TestColumnReader, testVarcharDictionaryWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(VARCHAR), "col0");
- rowType->addStructField(createPrimitiveType(CHAR), "col1");
- rowType->addStructField(createPrimitiveType(STRING), "col2");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(VARCHAR))
+ ->addStructField("col1", createPrimitiveType(CHAR))
+ ->addStructField("col2", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -576,11 +569,14 @@ TEST(TestColumnReader, testSubstructsWithNulls) {
(buffer4, ARRAY_SIZE(buffer4))));
// create the row type
+ std::unique_ptr<Type> innerType = createStructType();
+ innerType->addStructField("col2", createPrimitiveType(LONG));
+
+ std::unique_ptr<Type> middleType = createStructType();
+ middleType->addStructField("col1", std::move(innerType));
+
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createStructType(), "col0")
- .addStructField(createStructType(), "col1")
- .addStructField(createPrimitiveType(LONG), "col2");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(middleType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -684,9 +680,8 @@ TEST(TestColumnReader, testSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(INT), "myInt");
- rowType->addStructField(createPrimitiveType(STRING), "myString");
- rowType->assignIds(0);
+ rowType->addStructField("myInt", createPrimitiveType(INT));
+ rowType->addStructField("myString", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -766,8 +761,7 @@ TEST(TestColumnReader, testBinaryDirect) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BINARY), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BINARY));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -829,8 +823,7 @@ TEST(TestColumnReader, testBinaryDirectWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(BINARY), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(BINARY));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -881,7 +874,7 @@ TEST(TestColumnReader, testShortBlobError) {
EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
.WillRepeatedly(testing::Return(new SeekableArrayInputStream
(blob, ARRAY_SIZE(blob))));
-
+
const unsigned char buffer1[] = {0x61, 0x00, 0x02};
EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_LENGTH, true))
.WillRepeatedly(testing::Return(new SeekableArrayInputStream
@@ -889,8 +882,7 @@ TEST(TestColumnReader, testShortBlobError) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -939,8 +931,7 @@ TEST(TestColumnReader, testStringDirectShortBuffer) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1002,8 +993,7 @@ TEST(TestColumnReader, testStringDirectShortBufferWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1073,8 +1063,7 @@ TEST(TestColumnReader, testStringDirectNullAcrossWindow) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1132,7 +1121,7 @@ TEST(TestColumnReader, testStringDirectSkip) {
(blob, BLOB_SIZE, 200)));
// the stream of 0 to 1199
- const unsigned char buffer1[] =
+ const unsigned char buffer1[] =
{ 0x7f, 0x01, 0x00,
0x7f, 0x01, 0x82, 0x01,
0x7f, 0x01, 0x84, 0x02,
@@ -1149,8 +1138,7 @@ TEST(TestColumnReader, testStringDirectSkip) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1250,8 +1238,7 @@ TEST(TestColumnReader, testStringDirectSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(STRING), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createPrimitiveType(STRING));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1338,8 +1325,7 @@ TEST(TestColumnReader, testList) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1371,10 +1357,11 @@ TEST(TestColumnReader, testListPropagateNulls) {
EXPECT_CALL(streams, getSelectedColumns())
.WillRepeatedly(testing::Return(selectedColumns));
+ std::unique_ptr<Type> innerType = createStructType();
+ innerType->addStructField("col0_0",
+ createListType(createPrimitiveType(LONG)));
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createStructType(), "col0")
- .addStructField(createListType(createPrimitiveType(LONG)), "col0_0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(innerType));
// set getEncoding
proto::ColumnEncoding directEncoding;
@@ -1495,8 +1482,7 @@ TEST(TestColumnReader, testListWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1658,8 +1644,7 @@ TEST(TestColumnReader, testListSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1759,8 +1744,7 @@ TEST(TestColumnReader, testListSkipWithNullsNoData) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1857,10 +1841,8 @@ TEST(TestColumnReader, testMap) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createMapType(createPrimitiveType(LONG),
- createPrimitiveType(LONG)),
- "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+ createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -1976,10 +1958,8 @@ TEST(TestColumnReader, testMapWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createMapType(createPrimitiveType(LONG),
- createPrimitiveType(LONG)),
- "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+ createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -2186,10 +2166,8 @@ TEST(TestColumnReader, testMapSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createMapType(createPrimitiveType(LONG),
- createPrimitiveType(LONG)),
- "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+ createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -2296,10 +2274,8 @@ TEST(TestColumnReader, testMapSkipWithNullsNoData) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createMapType(createPrimitiveType(LONG),
- createPrimitiveType(LONG)),
- "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+ createPrimitiveType(LONG)));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -2384,8 +2360,7 @@ TEST(TestColumnReader, testFloatWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(FLOAT), "myFloat");
- rowType->assignIds(0);
+ rowType->addStructField("myFloat", createPrimitiveType(FLOAT));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2448,8 +2423,7 @@ TEST(TestColumnReader, testFloatSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(FLOAT), "myFloat");
- rowType->assignIds(0);
+ rowType->addStructField("myFloat", createPrimitiveType(FLOAT));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2550,8 +2524,7 @@ TEST(TestColumnReader, testDoubleWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble");
- rowType->assignIds(0);
+ rowType->addStructField("myDouble", createPrimitiveType(DOUBLE));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2615,8 +2588,7 @@ TEST(TestColumnReader, testDoubleSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble");
- rowType->assignIds(0);
+ rowType->addStructField("myDouble", createPrimitiveType(DOUBLE));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2687,7 +2659,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) {
.WillRepeatedly(testing::Return(new SeekableArrayInputStream
(buffer1, ARRAY_SIZE(buffer1))));
- const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee,
+ const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee,
0xe2, 0x10, 0xf8, 0x92, 0xee, 0xf, 0x92,
0xa0, 0xd4, 0x30 };
EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
@@ -2701,8 +2673,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp");
- rowType->assignIds(0);
+ rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2812,8 +2783,7 @@ TEST(TestColumnReader, testTimestamp) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp");
- rowType->assignIds(0);
+ rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP));
std::unique_ptr<ColumnReader> reader =
buildReader(*rowType, streams);
@@ -2902,8 +2872,7 @@ TEST(DecimalColumnReader, testDecimal64) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(12, 2), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(12, 2));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -2983,8 +2952,7 @@ TEST(DecimalColumnReader, testDecimal64Skip) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(12, 10), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(12, 10));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3059,8 +3027,7 @@ TEST(DecimalColumnReader, testDecimal128) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(32, 2), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(32, 2));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3153,8 +3120,7 @@ TEST(DecimalColumnReader, testDecimal128Skip) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(38, 37), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(38, 37));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3241,8 +3207,7 @@ TEST(DecimalColumnReader, testDecimalHive11) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3341,8 +3306,7 @@ TEST(DecimalColumnReader, testDecimalHive11Skip) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3427,8 +3391,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3516,8 +3479,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3587,8 +3549,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3646,8 +3607,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3712,8 +3672,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3795,8 +3754,7 @@ TEST(DecimalColumnReader, testDecimalHive11BigBatches) {
// create the row type
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createDecimalType(0, 0), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", createDecimalType(0, 0));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -3892,12 +3850,11 @@ TEST(TestColumnReader, testUnion) {
(buffer3, ARRAY_SIZE(buffer3))));
// create the row type
- std::vector<Type*> childrenTypes;
- childrenTypes.push_back(createPrimitiveType(LONG).release());
- childrenTypes.push_back(createPrimitiveType(INT).release());
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(createPrimitiveType(LONG));
+ unionType->addUnionChild(createPrimitiveType(INT));
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createUnionType(childrenTypes), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(unionType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -4043,12 +4000,11 @@ TEST(TestColumnReader, testUnionWithNulls) {
(buffer4, ARRAY_SIZE(buffer4))));
// create the row type
- std::vector<Type*> childrenTypes;
- childrenTypes.push_back(createPrimitiveType(LONG).release());
- childrenTypes.push_back(createPrimitiveType(INT).release());
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(createPrimitiveType(LONG));
+ unionType->addUnionChild(createPrimitiveType(INT));
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createUnionType(childrenTypes), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(unionType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -4138,12 +4094,11 @@ TEST(TestColumnReader, testUnionSkips) {
(buffer3, ARRAY_SIZE(buffer3))));
// create the row type
- std::vector<Type*> childrenTypes;
- childrenTypes.push_back(createPrimitiveType(LONG).release());
- childrenTypes.push_back(createPrimitiveType(INT).release());
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(createPrimitiveType(LONG));
+ unionType->addUnionChild(createPrimitiveType(INT));
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createUnionType(childrenTypes), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(unionType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -4242,12 +4197,11 @@ TEST(TestColumnReader, testUnionLongSkip) {
(buffer2, ARRAY_SIZE(buffer2))));
// create the row type
- std::vector<Type*> childrenTypes;
- childrenTypes.push_back(createPrimitiveType(LONG).release());
- childrenTypes.push_back(createPrimitiveType(INT).release());
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(createPrimitiveType(LONG));
+ unionType->addUnionChild(createPrimitiveType(INT));
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createUnionType(childrenTypes), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(unionType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -4326,7 +4280,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
// for variant in range(0, 130):
// [variant & 0x3f, (variant & 0x3f) + 1, (variant & 0x3f) + 2]
unsigned char buffer[3 * 130];
- for(int variant = 0; variant < 130; ++variant) {
+ for(uint variant = 0; variant < 130; ++variant) {
buffer[3 * variant] = 0x00;
buffer[3 * variant + 1] = 0x01;
buffer[3 * variant + 2] = static_cast<unsigned char>((variant * 2) & 0x7f);
@@ -4337,13 +4291,12 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
}
// create the row type
- std::vector<Type*> childrenTypes;
+ std::unique_ptr<Type> unionType = createUnionType();
for(size_t variant=0; variant < 130; ++variant) {
- childrenTypes.push_back(createPrimitiveType(LONG).release());
+ unionType->addUnionChild(createPrimitiveType(LONG));
}
std::unique_ptr<Type> rowType = createStructType();
- rowType->addStructField(createUnionType(childrenTypes), "col0");
- rowType->assignIds(0);
+ rowType->addStructField("col0", std::move(unionType));
std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
@@ -4366,7 +4319,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
for (size_t i = 0; i < batch.numElements; ++i) {
EXPECT_EQ(i, unions->tags[i]);
EXPECT_EQ(0, unions->offsets[i]);
- EXPECT_EQ(i & 0x3f,
+ EXPECT_EQ(i & 0x3f,
dynamic_cast<LongVectorBatch*>(unions->children[unions->tags[i]])
->data[unions->offsets[i]]);
}
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestType.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
new file mode 100644
index 0000000..3c595d0
--- /dev/null
+++ b/c++/test/TestType.cc
@@ -0,0 +1,277 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "OrcTest.hh"
+#include "orc/Type.hh"
+#include "wrap/gtest-wrapper.h"
+
+#include "TypeImpl.hh"
+
+namespace orc {
+
+ uint64_t checkIds(const Type* type, uint64_t next) {
+ EXPECT_EQ(next, type->getColumnId())
+ << "Wrong id for " << type->toString();
+ next += 1;
+ for(uint64_t child = 0; child < type->getSubtypeCount(); ++child) {
+ next = checkIds(type->getSubtype(child), next) + 1;
+ }
+ EXPECT_EQ(next - 1, type->getMaximumColumnId())
+ << "Wrong maximum id for " << type->toString();
+ return type->getMaximumColumnId();
+ }
+
+ TEST(TestType, simple) {
+ std::unique_ptr<Type> myType = createStructType();
+ myType->addStructField("myInt", createPrimitiveType(INT));
+ myType->addStructField("myString", createPrimitiveType(STRING));
+ myType->addStructField("myFloat", createPrimitiveType(FLOAT));
+ myType->addStructField("list", createListType(createPrimitiveType(LONG)));
+ myType->addStructField("bool", createPrimitiveType(BOOLEAN));
+
+ EXPECT_EQ(0, myType->getColumnId());
+ EXPECT_EQ(6, myType->getMaximumColumnId());
+ EXPECT_EQ(5, myType->getSubtypeCount());
+ EXPECT_EQ(STRUCT, myType->getKind());
+ EXPECT_EQ("struct<myInt:int,myString:string,myFloat:float,"
+ "list:array<bigint>,bool:boolean>",
+ myType->toString());
+ checkIds(myType.get(), 0);
+
+ const Type* child = myType->getSubtype(0);
+ EXPECT_EQ(1, child->getColumnId());
+ EXPECT_EQ(1, child->getMaximumColumnId());
+ EXPECT_EQ(INT, child->getKind());
+ EXPECT_EQ(0, child->getSubtypeCount());
+
+ child = myType->getSubtype(1);
+ EXPECT_EQ(2, child->getColumnId());
+ EXPECT_EQ(2, child->getMaximumColumnId());
+ EXPECT_EQ(STRING, child->getKind());
+ EXPECT_EQ(0, child->getSubtypeCount());
+
+ child = myType->getSubtype(2);
+ EXPECT_EQ(3, child->getColumnId());
+ EXPECT_EQ(3, child->getMaximumColumnId());
+ EXPECT_EQ(FLOAT, child->getKind());
+ EXPECT_EQ(0, child->getSubtypeCount());
+
+ child = myType->getSubtype(3);
+ EXPECT_EQ(4, child->getColumnId());
+ EXPECT_EQ(5, child->getMaximumColumnId());
+ EXPECT_EQ(LIST, child->getKind());
+ EXPECT_EQ(1, child->getSubtypeCount());
+ EXPECT_EQ("array<bigint>", child->toString());
+
+ child = child->getSubtype(0);
+ EXPECT_EQ(5, child->getColumnId());
+ EXPECT_EQ(5, child->getMaximumColumnId());
+ EXPECT_EQ(LONG, child->getKind());
+ EXPECT_EQ(0, child->getSubtypeCount());
+
+ child = myType->getSubtype(4);
+ EXPECT_EQ(6, child->getColumnId());
+ EXPECT_EQ(6, child->getMaximumColumnId());
+ EXPECT_EQ(BOOLEAN, child->getKind());
+ EXPECT_EQ(0, child->getSubtypeCount());
+ }
+
+ TEST(TestType, nested) {
+ std::unique_ptr<Type> myType = createStructType();
+ {
+ std::unique_ptr<Type> innerStruct = createStructType();
+ innerStruct->addStructField("col0", createPrimitiveType(INT));
+
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(std::move(innerStruct));
+ unionType->addUnionChild(createPrimitiveType(STRING));
+
+ myType->addStructField("myList",
+ createListType
+ (createMapType(createPrimitiveType(STRING),
+ std::move(unionType))));
+ }
+
+ // get a pointer to the bottom type
+ const Type* listType = myType->getSubtype(0);
+ const Type* mapType = listType->getSubtype(0);
+ const Type* unionType = mapType->getSubtype(1);
+ const Type* structType = unionType->getSubtype(0);
+ const Type* intType = structType->getSubtype(0);
+
+ // calculate the id of the child to make sure that we climb correctly
+ EXPECT_EQ(6, intType->getColumnId());
+ EXPECT_EQ(6, intType->getMaximumColumnId());
+ EXPECT_EQ("int", intType->toString());
+
+ checkIds(myType.get(), 0);
+
+ EXPECT_EQ(5, structType->getColumnId());
+ EXPECT_EQ(6, structType->getMaximumColumnId());
+ EXPECT_EQ("struct<col0:int>", structType->toString());
+
+ EXPECT_EQ(4, unionType->getColumnId());
+ EXPECT_EQ(7, unionType->getMaximumColumnId());
+ EXPECT_EQ("uniontype<struct<col0:int>,string>", unionType->toString());
+
+ EXPECT_EQ(2, mapType->getColumnId());
+ EXPECT_EQ(7, mapType->getMaximumColumnId());
+ EXPECT_EQ("map<string,uniontype<struct<col0:int>,string>>",
+ mapType->toString());
+
+ EXPECT_EQ(1, listType->getColumnId());
+ EXPECT_EQ(7, listType->getMaximumColumnId());
+ EXPECT_EQ("array<map<string,uniontype<struct<col0:int>,string>>>",
+ listType->toString());
+
+ EXPECT_EQ(0, myType->getColumnId());
+ EXPECT_EQ(7, myType->getMaximumColumnId());
+ EXPECT_EQ("struct<myList:array<map<string,uniontype<struct<col0:int>,"
+ "string>>>>",
+ myType->toString());
+ }
+
+ TEST(TestType, selectedType) {
+ std::unique_ptr<Type> myType = createStructType();
+ myType->addStructField("col0", createPrimitiveType(BYTE));
+ myType->addStructField("col1", createPrimitiveType(SHORT));
+ myType->addStructField("col2",
+ createListType(createPrimitiveType(STRING)));
+ myType->addStructField("col3",
+ createMapType(createPrimitiveType(FLOAT),
+ createPrimitiveType(DOUBLE)));
+ std::unique_ptr<Type> unionType = createUnionType();
+ unionType->addUnionChild(createCharType(CHAR, 100));
+ unionType->addUnionChild(createCharType(VARCHAR, 200));
+ myType->addStructField("col4", std::move(unionType));
+ myType->addStructField("col5", createPrimitiveType(INT));
+ myType->addStructField("col6", createPrimitiveType(LONG));
+ myType->addStructField("col7", createDecimalType(10, 2));
+
+ checkIds(myType.get(), 0);
+ EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>,"
+ "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>,"
+ "col5:int,col6:bigint,col7:decimal(10,2)>", myType->toString());
+ EXPECT_EQ(0, myType->getColumnId());
+ EXPECT_EQ(13, myType->getMaximumColumnId());
+
+ std::vector<bool> selected(14);
+ selected[0] = true;
+ selected[2] = true;
+ std::unique_ptr<Type> cutType = buildSelectedType(myType.get(),
+ selected);
+ EXPECT_EQ("struct<col1:smallint>", cutType->toString());
+ EXPECT_EQ(0, cutType->getColumnId());
+ EXPECT_EQ(13, cutType->getMaximumColumnId());
+ EXPECT_EQ(2, cutType->getSubtype(0)->getColumnId());
+
+ selected.assign(14, true);
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>,"
+ "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>,"
+ "col5:int,col6:bigint,col7:decimal(10,2)>", cutType->toString());
+ EXPECT_EQ(0, cutType->getColumnId());
+ EXPECT_EQ(13, cutType->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[8] = true;
+ selected[10] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col4:uniontype<varchar(200)>>", cutType->toString());
+ EXPECT_EQ(0, cutType->getColumnId());
+ EXPECT_EQ(13, cutType->getMaximumColumnId());
+ EXPECT_EQ(8, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(10, cutType->getSubtype(0)->getMaximumColumnId());
+ EXPECT_EQ(10, cutType->getSubtype(0)->getSubtype(0)->getColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[8] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col4:uniontype<>>", cutType->toString());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<>", cutType->toString());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[3] = true;
+ selected[4] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col2:array<string>>", cutType->toString());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[3] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col2:array<void>>", cutType->toString());
+ EXPECT_EQ(3, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(4, cutType->getSubtype(0)->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[5] = true;
+ selected[6] = true;
+ selected[7] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col3:map<float,double>>", cutType->toString());
+ EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[5] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col3:map<void,void>>", cutType->toString());
+ EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[5] = true;
+ selected[6] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col3:map<float,void>>", cutType->toString());
+ EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[5] = true;
+ selected[7] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col3:map<void,double>>", cutType->toString());
+ EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+ selected.assign(14, false);
+ selected[0] = true;
+ selected[1] = true;
+ selected[13] = true;
+ cutType = buildSelectedType(myType.get(), selected);
+ EXPECT_EQ("struct<col0:tinyint,col7:decimal(10,2)>", cutType->toString());
+ EXPECT_EQ(1, cutType->getSubtype(0)->getColumnId());
+ EXPECT_EQ(1, cutType->getSubtype(0)->getMaximumColumnId());
+ EXPECT_EQ(13, cutType->getSubtype(1)->getColumnId());
+ EXPECT_EQ(13, cutType->getSubtype(1)->getMaximumColumnId());
+ }
+}
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 694fea3..ff7eb72 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -32,7 +32,7 @@ void printContents(const char* filename, const orc::ReaderOptions opts) {
std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
std::string line;
std::unique_ptr<orc::ColumnPrinter> printer =
- createColumnPrinter(line, reader->getType());
+ createColumnPrinter(line, &reader->getSelectedType());
while (reader->next(*batch)) {
printer->reset(*batch);
@@ -48,12 +48,36 @@ void printContents(const char* filename, const orc::ReaderOptions opts) {
int main(int argc, char* argv[]) {
if (argc < 2) {
- std::cout << "Usage: file-contents <filename>\n";
+ std::cout << "Usage: file-contents <filename> [--columns=1,2,...]\n"
+ << "Print contents of <filename>.\n"
+ << "If columns are specified, only these top-level (logical) columns are printed.\n" ;
return 1;
}
try {
+ const std::string COLUMNS_PREFIX = "--columns=";
+ std::list<uint64_t> cols;
+ char* filename = ORC_NULLPTR;
+
+ // Read command-line options
+ char *param, *value;
+ for (int i = 1; i < argc; i++) {
+ if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) {
+ value = std::strtok(param+COLUMNS_PREFIX.length(), "," );
+ while (value) {
+ cols.push_back(static_cast<uint64_t>(std::atoi(value)));
+ value = std::strtok(nullptr, "," );
+ }
+ } else {
+ filename = argv[i];
+ }
+ }
orc::ReaderOptions opts;
- printContents(argv[1], opts);
+ if (cols.size() > 0) {
+ opts.include(cols);
+ }
+ if (filename != ORC_NULLPTR) {
+ printContents(filename, opts);
+ }
} catch (std::exception& ex) {
std::cerr << "Caught exception: " << ex.what() << "\n";
return 1;
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index ba8459a..2bfd21a 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -70,7 +70,7 @@ int main(int argc, char* argv[]) {
char* filename = ORC_NULLPTR;
// Default parameters
- std::list<int64_t> cols;
+ std::list<uint64_t> cols;
uint32_t batchSize = 1000;
// Read command-line options
@@ -79,7 +79,7 @@ int main(int argc, char* argv[]) {
if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) {
value = std::strtok(param+COLUMNS_PREFIX.length(), "," );
while (value) {
- cols.push_back(std::atoi(value));
+ cols.push_back(static_cast<uint64_t>(std::atoi(value)));
value = std::strtok(nullptr, "," );
}
} else if ( (param=strstr(argv[i], BATCH_PREFIX.c_str())) ) {
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index cbb5980..4683847 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -31,10 +31,6 @@ int main(int argc, char* argv[]) {
}
orc::ReaderOptions opts;
- std::list<int64_t> cols;
- cols.push_back(0);
- opts.include(cols);
-
std::unique_ptr<orc::Reader> reader;
try{
reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index 0d337ca..7862eff 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -140,7 +140,7 @@ namespace orc {
std::unique_ptr<ColumnVectorBatch> batch = reader->createRowBatch(1024);
std::string line;
std::unique_ptr<orc::ColumnPrinter> printer =
- orc::createColumnPrinter(line, reader->getType());
+ orc::createColumnPrinter(line, &reader->getSelectedType());
GzipTextReader expected(getJsonFilename());
std::string expectedLine;
while (reader->next(*batch)) {
@@ -511,8 +511,8 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
TEST(Reader, columnSelectionTest) {
ReaderOptions opts;
- std::list<int64_t> includes;
- for(int i=1; i < 10; i += 2) {
+ std::list<uint64_t> includes;
+ for(uint64_t i=0; i < 9; i += 2) {
includes.push_back(i);
}
opts.include(includes);
@@ -545,17 +545,17 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
EXPECT_EQ("_col6", rootType.getFieldName(6));
EXPECT_EQ("_col7", rootType.getFieldName(7));
EXPECT_EQ("_col8", rootType.getFieldName(8));
- EXPECT_EQ(INT, rootType.getSubtype(0).getKind());
- EXPECT_EQ(STRING, rootType.getSubtype(1).getKind());
- EXPECT_EQ(STRING, rootType.getSubtype(2).getKind());
- EXPECT_EQ(STRING, rootType.getSubtype(3).getKind());
- EXPECT_EQ(INT, rootType.getSubtype(4).getKind());
- EXPECT_EQ(STRING, rootType.getSubtype(5).getKind());
- EXPECT_EQ(INT, rootType.getSubtype(6).getKind());
- EXPECT_EQ(INT, rootType.getSubtype(7).getKind());
- EXPECT_EQ(INT, rootType.getSubtype(8).getKind());
+ EXPECT_EQ(INT, rootType.getSubtype(0)->getKind());
+ EXPECT_EQ(STRING, rootType.getSubtype(1)->getKind());
+ EXPECT_EQ(STRING, rootType.getSubtype(2)->getKind());
+ EXPECT_EQ(STRING, rootType.getSubtype(3)->getKind());
+ EXPECT_EQ(INT, rootType.getSubtype(4)->getKind());
+ EXPECT_EQ(STRING, rootType.getSubtype(5)->getKind());
+ EXPECT_EQ(INT, rootType.getSubtype(6)->getKind());
+ EXPECT_EQ(INT, rootType.getSubtype(7)->getKind());
+ EXPECT_EQ(INT, rootType.getSubtype(8)->getKind());
for(unsigned int i=0; i < 9; ++i) {
- EXPECT_EQ(i + 1, rootType.getSubtype(i).getColumnId())
+ EXPECT_EQ(i + 1, rootType.getSubtype(i)->getColumnId())
<< "fail on " << i;
}
@@ -637,6 +637,11 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
StructVectorBatch *fullStructBatch =
dynamic_cast<StructVectorBatch*>(fullBatch.get());
ASSERT_TRUE(fullStructBatch != nullptr);
+ std::cout << "OOM fullBatch " << reinterpret_cast<uint64_t>(fullStructBatch)
+ << "\n";
+ std::cout << "OOM fields.size() "
+ << fullStructBatch->fields.size()
+ << "\n";
LongVectorBatch* fullLongVector =
dynamic_cast<LongVectorBatch*>(fullStructBatch->fields[0]);
ASSERT_TRUE(fullLongVector != nullptr);
@@ -911,11 +916,8 @@ TEST(Reader, selectColumns) {
orc::ReaderOptions opts;
std::ostringstream filename;
filename << exampleDirectory << "/TestOrcFile.testSeek.orc";
- std::list<int64_t> cols;
// All columns
- cols.push_back(0);
- opts.include(cols);
std::unique_ptr<orc::Reader> reader =
orc::createReader(orc::readLocalFile(filename.str()), opts);
std::vector<bool> c = reader->getSelectedColumns();
@@ -923,10 +925,30 @@ TEST(Reader, selectColumns) {
for (unsigned int i=0; i < c.size(); i++) {
EXPECT_TRUE(c[i]);
}
+ std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1);
+ std::string line;
+ std::unique_ptr<orc::ColumnPrinter> printer =
+ createColumnPrinter(line, &reader->getSelectedType());
+ reader->next(*batch);
+ printer->reset(*batch);
+ printer->printRow(0);
+ std::ostringstream expected;
+ expected << "{\"boolean1\": true, \"byte1\": -76, "
+ << "\"short1\": 21684, \"int1\": -941468492, "
+ << "\"long1\": -6863419716327549772, \"float1\": 0.7762409, "
+ << "\"double1\": 0.77624090391187, \"bytes1\": [123, 108, 207, 27, 93, "
+ << "157, 139, 233, 181, 90, 14, 60, 34, 120, 26, 119, 231, 50, 155, 121], "
+ << "\"string1\": \"887336a7\", \"middle\": {\"list\": [{\"int1\": "
+ << "-941468492, \"string1\": \"887336a7\"}, {\"int1\": -1598014431, "
+ << "\"string1\": \"ba419d35-x\"}]}, \"list\": [], \"map\": [{\"key\": "
+ << "\"ba419d35-x\", \"value\": {\"int1\": -1598014431, \"string1\": "
+ << "\"ba419d35-x\"}}, {\"key\": \"887336a7\", \"value\": {\"int1\": "
+ << "-941468492, \"string1\": \"887336a7\"}}]}";
+ EXPECT_EQ(expected.str(), line);
// Int column #2
- cols.clear();
- cols.push_back(2);
+ std::list<uint64_t> cols;
+ cols.push_back(1);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
@@ -936,10 +958,19 @@ TEST(Reader, selectColumns) {
else
EXPECT_TRUE(!c[i]);
}
+ batch = reader->createRowBatch(1);
+ line.clear();
+ printer = createColumnPrinter(line, &reader->getSelectedType());
+ reader->next(*batch);
+ printer->reset(*batch);
+ printer->printRow(0);
+ std::string expectedInt("{\"byte1\": -76}");
+ EXPECT_EQ(expectedInt, line);
+
// Struct column #10
cols.clear();
- cols.push_back(10);
+ cols.push_back(9);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
@@ -949,10 +980,21 @@ TEST(Reader, selectColumns) {
else
EXPECT_TRUE(!c[i]);
}
+ batch = reader->createRowBatch(1);
+ line.clear();
+ printer = createColumnPrinter(line, &reader->getSelectedType());
+ reader->next(*batch);
+ printer->reset(*batch);
+ printer->printRow(0);
+ std::ostringstream expectedStruct;
+ expectedStruct << "{\"middle\": {\"list\": "
+ << "[{\"int1\": -941468492, \"string1\": \"887336a7\"}, "
+ << "{\"int1\": -1598014431, \"string1\": \"ba419d35-x\"}]}}";
+ EXPECT_EQ(expectedStruct.str(), line);
// Array column #11
cols.clear();
- cols.push_back(11);
+ cols.push_back(10);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
@@ -962,10 +1004,18 @@ TEST(Reader, selectColumns) {
else
EXPECT_TRUE(!c[i]);
}
+ batch = reader->createRowBatch(1);
+ line.clear();
+ printer = createColumnPrinter(line, &reader->getSelectedType());
+ reader->next(*batch);
+ printer->reset(*batch);
+ printer->printRow(0);
+ std::string expectedArray("{\"list\": []}");
+ EXPECT_EQ(expectedArray, line);
// Map column #12
cols.clear();
- cols.push_back(12);
+ cols.push_back(11);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
c = reader->getSelectedColumns();
@@ -975,6 +1025,18 @@ TEST(Reader, selectColumns) {
else
EXPECT_TRUE(!c[i]);
}
+ batch = reader->createRowBatch(1);
+ line.clear();
+ printer = createColumnPrinter(line, &reader->getSelectedType());
+ reader->next(*batch);
+ printer->reset(*batch);
+ printer->printRow(0);
+ std::ostringstream expectedMap;
+ expectedMap << "{\"map\": [{\"key\": \"ba419d35-x\", \"value\": {\"int1\":"
+ << " -1598014431, \"string1\": \"ba419d35-x\"}}, {\"key\": "
+ << "\"887336a7\", \"value\": {\"int1\": -941468492, \"string1\": "
+ << "\"887336a7\"}}]}";
+ EXPECT_EQ(expectedMap.str(), line);
}
TEST(Reader, memoryUse) {
@@ -983,10 +1045,10 @@ TEST(Reader, memoryUse) {
std::unique_ptr<orc::Reader> reader;
std::unique_ptr<orc::ColumnVectorBatch> batch;
orc::ReaderOptions opts;
- std::list<int64_t> cols;
+ std::list<uint64_t> cols;
// Int column
- cols.push_back(2);
+ cols.push_back(1);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(483517, reader->getMemoryUse());
@@ -998,7 +1060,7 @@ TEST(Reader, memoryUse) {
// Binary column
cols.clear();
- cols.push_back(8);
+ cols.push_back(7);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(835906, reader->getMemoryUse());
@@ -1008,7 +1070,7 @@ TEST(Reader, memoryUse) {
// String column
cols.clear();
- cols.push_back(9);
+ cols.push_back(8);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(901442, reader->getMemoryUse());
@@ -1018,7 +1080,7 @@ TEST(Reader, memoryUse) {
// Struct column (with a List subcolumn)
cols.clear();
- cols.push_back(10);
+ cols.push_back(9);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(1294658, reader->getMemoryUse());
@@ -1028,7 +1090,7 @@ TEST(Reader, memoryUse) {
// List column
cols.clear();
- cols.push_back(11);
+ cols.push_back(10);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(1229122, reader->getMemoryUse());
@@ -1038,7 +1100,7 @@ TEST(Reader, memoryUse) {
// Map column
cols.clear();
- cols.push_back(12);
+ cols.push_back(11);
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(1491266, reader->getMemoryUse());
@@ -1048,7 +1110,9 @@ TEST(Reader, memoryUse) {
// All columns
cols.clear();
- cols.push_back(0);
+ for(uint64_t c=0; c < 12; ++c) {
+ cols.push_back(c);
+ }
opts.include(cols);
reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
EXPECT_EQ(4112706, reader->getMemoryUse());
[2/2] orc git commit: Fixed ORC-29: Enable ColumnPrinter to print
only specified columns. (asandryh and omalley)
Posted by om...@apache.org.
Fixed ORC-29: Enable ColumnPrinter to print only specified columns.
(asandryh and omalley)
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/3945f066
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/3945f066
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/3945f066
Branch: refs/heads/master
Commit: 3945f0663517b2d67d14c09ddb5990e3b569ffea
Parents: b39302f
Author: Aliaksei Sandryhaila <al...@hp.com>
Authored: Wed Jan 6 12:50:52 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed Jan 6 12:55:43 2016 -0800
----------------------------------------------------------------------
c++/include/CMakeLists.txt | 1 +
c++/include/orc/ColumnPrinter.hh | 5 +-
c++/include/orc/Reader.hh | 64 ++++--
c++/include/orc/Type.hh | 105 ++++++++++
c++/include/orc/Vector.hh | 75 -------
c++/src/ColumnPrinter.cc | 228 +++++++++++----------
c++/src/ColumnReader.cc | 10 +-
c++/src/ColumnReader.hh | 6 +-
c++/src/Reader.cc | 224 ++++++++++-----------
c++/src/TypeImpl.cc | 367 +++++++++++++++++++++++-----------
c++/src/TypeImpl.hh | 67 +++++--
c++/test/CMakeLists.txt | 1 +
c++/test/TestColumnPrinter.cc | 36 ++--
c++/test/TestColumnReader.cc | 225 +++++++++------------
c++/test/TestType.cc | 277 +++++++++++++++++++++++++
tools/src/FileContents.cc | 30 ++-
tools/src/FileMemory.cc | 4 +-
tools/src/FileScan.cc | 4 -
tools/test/TestReader.cc | 122 ++++++++---
19 files changed, 1186 insertions(+), 665 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/include/CMakeLists.txt b/c++/include/CMakeLists.txt
index 474c733..3891e71 100644
--- a/c++/include/CMakeLists.txt
+++ b/c++/include/CMakeLists.txt
@@ -77,6 +77,7 @@ install(FILES
"orc/MemoryPool.hh"
"orc/OrcFile.hh"
"orc/Reader.hh"
+ "orc/Type.hh"
"orc/Vector.hh"
DESTINATION "include/orc"
)
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/ColumnPrinter.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/ColumnPrinter.hh b/c++/include/orc/ColumnPrinter.hh
index 17c1901..aa19214 100644
--- a/c++/include/orc/ColumnPrinter.hh
+++ b/c++/include/orc/ColumnPrinter.hh
@@ -34,12 +34,11 @@ namespace orc {
class ColumnPrinter {
protected:
std::string &buffer;
- const Type& type;
bool hasNulls ;
const char* notNull;
public:
- ColumnPrinter(std::string&, const Type&);
+ ColumnPrinter(std::string&);
virtual ~ColumnPrinter();
virtual void printRow(uint64_t rowId) = 0;
// should be called once at the start of each batch of rows
@@ -47,6 +46,6 @@ namespace orc {
};
ORC_UNIQUE_PTR<ColumnPrinter> createColumnPrinter(std::string&,
- const Type& type);
+ const Type* type);
}
#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 316867d..d924fbf 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -20,7 +20,8 @@
#define ORC_READER_HH
#include "orc/orc-config.hh"
-#include "Vector.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
#include <memory>
#include <string>
@@ -411,22 +412,23 @@ namespace orc {
virtual ~ReaderOptions();
/**
- * Set the list of columns to read. All columns that are children of
- * selected columns are automatically selected. The default value is
- * {0}.
- * @param include a list of columns to read
+ * For files that have structs as the top-level object, select the fields
+ * to read. The first field is 0, the second 1, and so on. By default,
+ * all columns are read. This option clears any previous setting of
+ * the selected columns.
+ * @param include a list of fields to read
* @return this
*/
- ReaderOptions& include(const std::list<int64_t>& include);
+ ReaderOptions& include(const std::list<uint64_t>& include);
/**
- * Set the list of columns to read. All columns that are children of
- * selected columns are automatically selected. The default value is
- * {0}.
- * @param include a list of columns to read
+ * For files that have structs as the top-level object, select the fields
+ * to read by name. By default, all columns are read. This option clears
+ * any previous setting of the selected columns.
+ * @param include a list of fields to read
* @return this
*/
- ReaderOptions& include(std::vector<int64_t> include);
+ ReaderOptions& include(const std::list<std::string>& include);
/**
* Set the section of the file to process.
@@ -493,10 +495,26 @@ namespace orc {
ReaderOptions& setMemoryPool(MemoryPool& pool);
/**
+ * Were the include indexes set?
+ */
+ bool getIndexesSet() const;
+
+ /**
* Get the list of selected columns to read. All children of the selected
* columns are also selected.
*/
- const std::list<int64_t>& getInclude() const;
+ const std::list<uint64_t>& getInclude() const;
+
+ /**
+ * Were the include names set?
+ */
+ bool getNamesSet() const;
+
+ /**
+ * Get the list of selected columns to read. All children of the selected
+ * columns are also selected.
+ */
+ const std::list<std::string>& getIncludeNames() const;
/**
* Get the start of the range for the data being processed.
@@ -652,12 +670,23 @@ namespace orc {
getColumnStatistics(uint32_t columnId) const = 0;
/**
- * Get the type of the rows in the file. The top level is always a struct.
+ * Get the type of the rows in the file. The top level is typically a
+ * struct.
* @return the root type
*/
virtual const Type& getType() const = 0;
/**
+ * Get the selected type of the rows in the file. The file's row type
+ * is projected down to just the selected columns. Thus, if the file's
+ * type is struct<col0:int,col1:double,col2:string> and the selected
+ * columns are "col0,col2" the selected type would be
+ * struct<col0:int,col2:string>.
+ * @return the root type
+ */
+ virtual const Type& getSelectedType() const = 0;
+
+ /**
* Get the selected columns of the file.
*/
virtual const std::vector<bool> getSelectedColumns() const = 0;
@@ -667,8 +696,8 @@ namespace orc {
* @param size the number of rows to read
* @return a new ColumnVectorBatch to read into
*/
- virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch
- (uint64_t size) const = 0;
+ virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
+ ) const = 0;
/**
* Read the next row batch from the current position.
@@ -713,9 +742,10 @@ namespace orc {
/**
* Estimate an upper bound on heap memory allocation by the Reader
* based on the information in the file footer.
- * The bound is less tight if only few columns are read or compression is used.
+ * The bound is less tight if only few columns are read or compression is
+ * used.
* @param stripeIx index of the stripe to be read (if not specified,
- * all stripes are considered).
+ * all stripes are considered).
* @return upper bound on memory use
*/
virtual uint64_t getMemoryUse(int stripeIx=-1) = 0;
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Type.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
new file mode 100644
index 0000000..25b8f53
--- /dev/null
+++ b/c++/include/orc/Type.hh
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_TYPE_HH
+#define ORC_TYPE_HH
+
+#include "orc/orc-config.hh"
+#include "orc/Vector.hh"
+#include "MemoryPool.hh"
+
+namespace orc {
+
+ enum TypeKind {
+ BOOLEAN = 0,
+ BYTE = 1,
+ SHORT = 2,
+ INT = 3,
+ LONG = 4,
+ FLOAT = 5,
+ DOUBLE = 6,
+ STRING = 7,
+ BINARY = 8,
+ TIMESTAMP = 9,
+ LIST = 10,
+ MAP = 11,
+ STRUCT = 12,
+ UNION = 13,
+ DECIMAL = 14,
+ DATE = 15,
+ VARCHAR = 16,
+ CHAR = 17
+ };
+
+ class Type {
+ public:
+ virtual ~Type();
+ virtual uint64_t getColumnId() const = 0;
+ virtual uint64_t getMaximumColumnId() const = 0;
+ virtual TypeKind getKind() const = 0;
+ virtual uint64_t getSubtypeCount() const = 0;
+ virtual const Type* getSubtype(uint64_t childId) const = 0;
+ virtual const std::string& getFieldName(uint64_t childId) const = 0;
+ virtual uint64_t getMaximumLength() const = 0;
+ virtual uint64_t getPrecision() const = 0;
+ virtual uint64_t getScale() const = 0;
+ virtual std::string toString() const = 0;
+
+ /**
+ * Create a row batch for this type.
+ */
+ virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size,
+ MemoryPool& pool
+ ) const = 0;
+
+ /**
+ * Add a new field to a struct type.
+ * @param fieldName the name of the new field
+ * @param fieldType the type of the new field
+ * @return a reference to the struct type
+ */
+ virtual Type* addStructField(const std::string& fieldName,
+ ORC_UNIQUE_PTR<Type> fieldType) = 0;
+
+ /**
+ * Add a new child to a union type.
+ * @param fieldType the type of the new field
+ * @return a reference to the union type
+ */
+ virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0;
+ };
+
+ const int64_t DEFAULT_DECIMAL_SCALE = 18;
+ const int64_t DEFAULT_DECIMAL_PRECISION = 38;
+
+ ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
+ ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
+ uint64_t maxLength);
+ ORC_UNIQUE_PTR<Type>
+ createDecimalType(uint64_t precision=
+ DEFAULT_DECIMAL_PRECISION,
+ uint64_t scale=DEFAULT_DECIMAL_SCALE);
+
+ ORC_UNIQUE_PTR<Type> createStructType();
+ ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
+ ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
+ ORC_UNIQUE_PTR<Type> value);
+ ORC_UNIQUE_PTR<Type> createUnionType();
+
+}
+#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Vector.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 8037400..8f6a0da 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -33,81 +33,6 @@
namespace orc {
- enum TypeKind {
- BOOLEAN = 0,
- BYTE = 1,
- SHORT = 2,
- INT = 3,
- LONG = 4,
- FLOAT = 5,
- DOUBLE = 6,
- STRING = 7,
- BINARY = 8,
- TIMESTAMP = 9,
- LIST = 10,
- MAP = 11,
- STRUCT = 12,
- UNION = 13,
- DECIMAL = 14,
- DATE = 15,
- VARCHAR = 16,
- CHAR = 17
- };
-
- std::string kind2String(TypeKind t);
-
- class Type {
- public:
- virtual ~Type();
- virtual int64_t assignIds(int64_t root) = 0;
- virtual int64_t getColumnId() const = 0;
- virtual TypeKind getKind() const = 0;
- virtual uint64_t getSubtypeCount() const = 0;
- virtual const Type& getSubtype(uint64_t typeId) const = 0;
- virtual const std::string& getFieldName(uint64_t fieldId) const = 0;
- virtual uint64_t getMaximumLength() const = 0;
- virtual uint64_t getPrecision() const = 0;
- virtual uint64_t getScale() const = 0;
- virtual std::string toString() const = 0;
-
- /**
- * Add a new field to a struct type.
- * @param fieldType the type of the new field
- * @param fieldName the name of the new field
- * @return a reference to the field's type
- */
- virtual Type& addStructField(ORC_UNIQUE_PTR<Type> fieldType,
- const std::string& fieldName) = 0;
- };
-
- const int64_t DEFAULT_DECIMAL_SCALE = 18;
- const int64_t DEFAULT_DECIMAL_PRECISION = 38;
-
- ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
- ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
- uint64_t maxLength);
- ORC_UNIQUE_PTR<Type>
- createDecimalType(uint64_t precision=
- DEFAULT_DECIMAL_PRECISION,
- uint64_t scale=DEFAULT_DECIMAL_SCALE);
-
- ORC_UNIQUE_PTR<Type> createStructType();
- ORC_UNIQUE_PTR<Type>
- createStructType(std::vector<Type*> types,
- std::vector<std::string> fieldNames);
-
-#ifdef ORC_CXX_HAS_INITIALIZER_LIST
- std::unique_ptr<Type> createStructType(
- std::initializer_list<std::unique_ptr<Type> > types,
- std::initializer_list<std::string> fieldNames);
-#endif
-
- ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
- ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
- ORC_UNIQUE_PTR<Type> value);
- ORC_UNIQUE_PTR<Type>
- createUnionType(std::vector<Type*> types);
-
/**
* The base class for each of the column vectors. This class handles
* the generic attributes such as number of elements, capacity, and
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index aa90be6..764da01 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -33,11 +33,19 @@
namespace orc {
+ class VoidColumnPrinter: public ColumnPrinter {
+ public:
+ VoidColumnPrinter(std::string&);
+ ~VoidColumnPrinter() {}
+ void printRow(uint64_t rowId) override;
+ void reset(const ColumnVectorBatch& batch) override;
+ };
+
class BooleanColumnPrinter: public ColumnPrinter {
private:
const int64_t* data;
public:
- BooleanColumnPrinter(std::string&, const Type&);
+ BooleanColumnPrinter(std::string&);
~BooleanColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -47,7 +55,7 @@ namespace orc {
private:
const int64_t* data;
public:
- LongColumnPrinter(std::string&, const Type&);
+ LongColumnPrinter(std::string&);
~LongColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -59,7 +67,7 @@ namespace orc {
const bool isFloat;
public:
- DoubleColumnPrinter(std::string&, const Type&);
+ DoubleColumnPrinter(std::string&, const Type& type);
virtual ~DoubleColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -71,7 +79,7 @@ namespace orc {
const int64_t* nanoseconds;
public:
- TimestampColumnPrinter(std::string&, const Type&);
+ TimestampColumnPrinter(std::string&);
~TimestampColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -82,7 +90,7 @@ namespace orc {
const int64_t* data;
public:
- DateColumnPrinter(std::string&, const Type& type);
+ DateColumnPrinter(std::string&);
~DateColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -93,7 +101,7 @@ namespace orc {
const int64_t* data;
int32_t scale;
public:
- Decimal64ColumnPrinter(std::string&, const Type& type);
+ Decimal64ColumnPrinter(std::string&);
~Decimal64ColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -104,7 +112,7 @@ namespace orc {
const Int128* data;
int32_t scale;
public:
- Decimal128ColumnPrinter(std::string&, const Type& type);
+ Decimal128ColumnPrinter(std::string&);
~Decimal128ColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -115,7 +123,7 @@ namespace orc {
const char* const * start;
const int64_t* length;
public:
- StringColumnPrinter(std::string&, const Type& type);
+ StringColumnPrinter(std::string&);
virtual ~StringColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -126,7 +134,7 @@ namespace orc {
const char* const * start;
const int64_t* length;
public:
- BinaryColumnPrinter(std::string&, const Type& type);
+ BinaryColumnPrinter(std::string&);
virtual ~BinaryColumnPrinter() {}
void printRow(uint64_t rowId) override;
void reset(const ColumnVectorBatch& batch) override;
@@ -173,6 +181,7 @@ namespace orc {
class StructColumnPrinter: public ColumnPrinter {
private:
std::vector<ColumnPrinter*> fieldPrinter;
+ std::vector<std::string> fieldNames;
public:
StructColumnPrinter(std::string&, const Type& type);
virtual ~StructColumnPrinter();
@@ -189,9 +198,8 @@ namespace orc {
file.append(ptr, len);
}
- ColumnPrinter::ColumnPrinter(std::string& _buffer, const Type& _type
- ): buffer(_buffer),
- type(_type) {
+ ColumnPrinter::ColumnPrinter(std::string& _buffer
+ ): buffer(_buffer) {
notNull = nullptr;
hasNulls = false;
}
@@ -210,76 +218,92 @@ namespace orc {
}
std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
- const Type& type) {
- ColumnPrinter *result;
- switch(static_cast<int64_t>(type.getKind())) {
- case BOOLEAN:
- result = new BooleanColumnPrinter(buffer, type);
- break;
-
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- result = new LongColumnPrinter(buffer, type);
- break;
-
- case FLOAT:
- case DOUBLE:
- result = new DoubleColumnPrinter(buffer, type);
- break;
-
- case STRING:
- case VARCHAR :
- case CHAR:
- result = new StringColumnPrinter(buffer, type);
- break;
-
- case BINARY:
- result = new BinaryColumnPrinter(buffer, type);
- break;
-
- case TIMESTAMP:
- result = new TimestampColumnPrinter(buffer, type);
- break;
-
- case LIST:
- result = new ListColumnPrinter(buffer, type);
- break;
-
- case MAP:
- result = new MapColumnPrinter(buffer, type);
- break;
-
- case STRUCT:
- result = new StructColumnPrinter(buffer, type);
- break;
-
- case DECIMAL:
- if (type.getPrecision() == 0 || type.getPrecision() > 18) {
- result = new Decimal128ColumnPrinter(buffer, type);
- } else {
- result = new Decimal64ColumnPrinter(buffer, type);
- }
- break;
+ const Type* type) {
+ ColumnPrinter *result = nullptr;
+ if (type == nullptr) {
+ result = new VoidColumnPrinter(buffer);
+ } else {
+ switch(static_cast<int64_t>(type->getKind())) {
+ case BOOLEAN:
+ result = new BooleanColumnPrinter(buffer);
+ break;
+
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ result = new LongColumnPrinter(buffer);
+ break;
+
+ case FLOAT:
+ case DOUBLE:
+ result = new DoubleColumnPrinter(buffer, *type);
+ break;
+
+ case STRING:
+ case VARCHAR :
+ case CHAR:
+ result = new StringColumnPrinter(buffer);
+ break;
+
+ case BINARY:
+ result = new BinaryColumnPrinter(buffer);
+ break;
+
+ case TIMESTAMP:
+ result = new TimestampColumnPrinter(buffer);
+ break;
+
+ case LIST:
+ result = new ListColumnPrinter(buffer, *type);
+ break;
+
+ case MAP:
+ result = new MapColumnPrinter(buffer, *type);
+ break;
+
+ case STRUCT:
+ result = new StructColumnPrinter(buffer, *type);
+ break;
+
+ case DECIMAL:
+ if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+ result = new Decimal128ColumnPrinter(buffer);
+ } else {
+ result = new Decimal64ColumnPrinter(buffer);
+ }
+ break;
- case DATE:
- result = new DateColumnPrinter(buffer, type);
- break;
+ case DATE:
+ result = new DateColumnPrinter(buffer);
+ break;
- case UNION:
- result = new UnionColumnPrinter(buffer, type);
- break;
+ case UNION:
+ result = new UnionColumnPrinter(buffer, *type);
+ break;
- default:
- throw std::logic_error("unknown batch type");
+ default:
+ throw std::logic_error("unknown batch type");
+ }
}
return std::unique_ptr<ColumnPrinter>(result);
}
- LongColumnPrinter::LongColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
+ VoidColumnPrinter::VoidColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer) {
+ // PASS
+ }
+
+ void VoidColumnPrinter::reset(const ColumnVectorBatch&) {
+ // PASS
+ }
+
+ void VoidColumnPrinter::printRow(uint64_t) {
+ writeString(buffer, "null");
+ }
+
+ LongColumnPrinter::LongColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
data(nullptr) {
// PASS
}
@@ -302,7 +326,7 @@ namespace orc {
DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type),
+ ): ColumnPrinter(buffer),
data(nullptr),
isFloat(type.getKind() == FLOAT){
// PASS
@@ -324,10 +348,8 @@ namespace orc {
}
}
- Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type),
+ Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
data(nullptr),
scale(0) {
// PASS
@@ -376,10 +398,8 @@ namespace orc {
}
}
- Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type),
+ Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
data(nullptr),
scale(0) {
// PASS
@@ -399,9 +419,8 @@ namespace orc {
}
}
- StringColumnPrinter::StringColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
+ StringColumnPrinter::StringColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
start(nullptr),
length(nullptr) {
// PASS
@@ -453,7 +472,7 @@ namespace orc {
ListColumnPrinter::ListColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type),
+ ): ColumnPrinter(buffer),
offsets(nullptr) {
elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
}
@@ -482,7 +501,7 @@ namespace orc {
MapColumnPrinter::MapColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type),
+ ): ColumnPrinter(buffer),
offsets(nullptr) {
keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
@@ -517,7 +536,7 @@ namespace orc {
UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type),
+ ): ColumnPrinter(buffer),
tags(nullptr),
offsets(nullptr) {
for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
@@ -560,9 +579,11 @@ namespace orc {
StructColumnPrinter::StructColumnPrinter(std::string& buffer,
const Type& type
- ): ColumnPrinter(buffer, type) {
+ ): ColumnPrinter(buffer) {
for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
- fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
+ fieldNames.push_back(type.getFieldName(i));
+ fieldPrinter.push_back(createColumnPrinter(buffer,
+ type.getSubtype(i))
.release());
}
}
@@ -592,7 +613,7 @@ namespace orc {
writeString(buffer, ", ");
}
writeChar(buffer, '"');
- writeString(buffer, type.getFieldName(i).c_str());
+ writeString(buffer, fieldNames[i].c_str());
writeString(buffer, "\": ");
fieldPrinter[i]->printRow(rowId);
}
@@ -600,9 +621,8 @@ namespace orc {
}
}
- DateColumnPrinter::DateColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
+ DateColumnPrinter::DateColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
data(nullptr) {
// PASS
}
@@ -627,9 +647,8 @@ namespace orc {
data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
}
- BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
+ BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
data(nullptr) {
// PASS
}
@@ -647,9 +666,8 @@ namespace orc {
data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
}
- BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer, type),
+ BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
start(nullptr),
length(nullptr) {
// PASS
@@ -679,10 +697,8 @@ namespace orc {
length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
}
- TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
- const Type& type
- ): ColumnPrinter(buffer,
- type),
+ TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer
+ ): ColumnPrinter(buffer),
seconds(nullptr),
nanoseconds(nullptr) {
// PASS
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 84e6db2..ae4d9b6 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -769,7 +769,7 @@ namespace orc {
switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
case proto::ColumnEncoding_Kind_DIRECT:
for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
- const Type& child = type.getSubtype(i);
+ const Type& child = *type.getSubtype(i);
if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) {
children.push_back(buildReader(child, stripe).release());
}
@@ -836,7 +836,7 @@ namespace orc {
proto::Stream_Kind_LENGTH,
true),
false, vers, memoryPool);
- const Type& childType = type.getSubtype(0);
+ const Type& childType = *type.getSubtype(0);
if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) {
child = buildReader(childType, stripe);
}
@@ -929,11 +929,11 @@ namespace orc {
proto::Stream_Kind_LENGTH,
true),
false, vers, memoryPool);
- const Type& keyType = type.getSubtype(0);
+ const Type& keyType = *type.getSubtype(0);
if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) {
keyReader = buildReader(keyType, stripe);
}
- const Type& elementType = type.getSubtype(1);
+ const Type& elementType = *type.getSubtype(1);
if (selectedColumns[static_cast<uint64_t>(elementType.getColumnId())]) {
elementReader = buildReader(elementType, stripe);
}
@@ -1040,7 +1040,7 @@ namespace orc {
// figure out which types are selected
const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
for(unsigned int i=0; i < numChildren; ++i) {
- const Type &child = type.getSubtype(i);
+ const Type &child = *type.getSubtype(i);
if (selectedColumns[static_cast<size_t>(child.getColumnId())]) {
childrenReader[i] = buildReader(child, stripe).release();
}
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnReader.hh
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 73db911..142d41e 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -45,7 +45,7 @@ namespace orc {
/**
* Get the encoding for the given column for this stripe.
*/
- virtual proto::ColumnEncoding getEncoding(int64_t columnId) const = 0;
+ virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0;
/**
* Get the stream for the given column/kind in this stripe.
@@ -55,7 +55,7 @@ namespace orc {
* @return the new stream
*/
virtual std::unique_ptr<SeekableInputStream>
- getStream(int64_t columnId,
+ getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const = 0;
@@ -78,7 +78,7 @@ namespace orc {
class ColumnReader {
protected:
std::unique_ptr<ByteRleDecoder> notNullDecoder;
- int64_t columnId;
+ uint64_t columnId;
MemoryPool& memoryPool;
public:
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 58f441c..940ef16 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -39,7 +39,10 @@
namespace orc {
struct ReaderOptionsPrivate {
- std::list<int64_t> includedColumns;
+ bool setIndexes;
+ bool setNames;
+ std::list<uint64_t> includedColumnIndexes;
+ std::list<std::string> includedColumnNames;
uint64_t dataStart;
uint64_t dataLength;
uint64_t tailLocation;
@@ -50,7 +53,8 @@ namespace orc {
std::string serializedTail;
ReaderOptionsPrivate() {
- includedColumns.assign(1,0);
+ setIndexes = false;
+ setNames = false;
dataStart = 0;
dataLength = std::numeric_limits<uint64_t>::max();
tailLocation = std::numeric_limits<uint64_t>::max();
@@ -91,13 +95,20 @@ namespace orc {
// PASS
}
- ReaderOptions& ReaderOptions::include(const std::list<int64_t>& include) {
- privateBits->includedColumns.assign(include.begin(), include.end());
+ ReaderOptions& ReaderOptions::include(const std::list<uint64_t>& include) {
+ privateBits->setIndexes = true;
+ privateBits->includedColumnIndexes.assign(include.begin(), include.end());
+ privateBits->setNames = false;
+ privateBits->includedColumnNames.clear();
return *this;
}
- ReaderOptions& ReaderOptions::include(std::vector<int64_t> include) {
- privateBits->includedColumns.assign(include.begin(), include.end());
+ ReaderOptions& ReaderOptions::include
+ (const std::list<std::string>& include) {
+ privateBits->setNames = true;
+ privateBits->includedColumnNames.assign(include.begin(), include.end());
+ privateBits->setIndexes = false;
+ privateBits->includedColumnIndexes.clear();
return *this;
}
@@ -128,8 +139,20 @@ namespace orc {
return privateBits->memoryPool;
}
- const std::list<int64_t>& ReaderOptions::getInclude() const {
- return privateBits->includedColumns;
+ bool ReaderOptions::getIndexesSet() const {
+ return privateBits->setIndexes;
+ }
+
+ const std::list<uint64_t>& ReaderOptions::getInclude() const {
+ return privateBits->includedColumnIndexes;
+ }
+
+ bool ReaderOptions::getNamesSet() const {
+ return privateBits->setNames;
+ }
+
+ const std::list<std::string>& ReaderOptions::getIncludeNames() const {
+ return privateBits->includedColumnNames;
}
uint64_t ReaderOptions::getOffset() const {
@@ -875,6 +898,7 @@ namespace orc {
DataBuffer<uint64_t> firstRowOfStripe;
uint64_t numberOfStripes;
std::unique_ptr<Type> schema;
+ mutable std::unique_ptr<Type> selectedSchema;
// metadata
mutable std::unique_ptr<proto::Metadata> metadata;
@@ -897,9 +921,8 @@ namespace orc {
void checkOrcVersion();
void selectType(const Type& type);
void readMetadata() const;
- std::unique_ptr<ColumnVectorBatch> createRowBatch(const Type& type,
- uint64_t capacity
- ) const;
+ void updateSelected(const std::list<uint64_t>& fieldIds);
+ void updateSelected(const std::list<std::string>& fieldNames);
public:
/**
@@ -956,6 +979,8 @@ namespace orc {
const Type& getType() const override;
+ const Type& getSelectedType() const override;
+
const std::vector<bool> getSelectedColumns() const override;
std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size
@@ -1062,30 +1087,23 @@ namespace orc {
}
schema = convertType(footer->types(0), *footer);
- schema->assignIds(0);
selectedColumns.assign(static_cast<size_t>(footer->types_size()), false);
-
- const std::list<int64_t>& included = options.getInclude();
- for(std::list<int64_t>::const_iterator columnId = included.begin();
- columnId != included.end(); ++columnId) {
- if (*columnId == 0) {
- selectType(*(schema.get()));
- } else if (*columnId <=
- static_cast<int64_t>(schema->getSubtypeCount())) {
- selectType(schema->getSubtype(static_cast<uint64_t>(*columnId-1)));
- }
- }
- if (included.size() > 0) {
- selectedColumns[0] = true;
+ if (schema->getKind() == STRUCT && options.getIndexesSet()) {
+ updateSelected(options.getInclude());
+ } else if (schema->getKind() == STRUCT && options.getNamesSet()) {
+ updateSelected(options.getIncludeNames());
+ } else {
+ std::fill(selectedColumns.begin(), selectedColumns.end(), true);
}
+ selectedColumns[0] = true;
}
void ReaderImpl::selectType(const Type& type) {
if (!selectedColumns[static_cast<size_t>(type.getColumnId())]) {
selectedColumns[static_cast<size_t>(type.getColumnId())] = true;
for (uint64_t i=0; i < type.getSubtypeCount(); i++) {
- selectType(type.getSubtype(i));
+ selectType(*type.getSubtype(i));
}
}
}
@@ -1206,6 +1224,14 @@ namespace orc {
return *(schema.get());
}
+ const Type& ReaderImpl::getSelectedType() const {
+ if (selectedSchema.get() == nullptr) {
+ selectedSchema = buildSelectedType(schema.get(),
+ selectedColumns);
+ }
+ return *(selectedSchema.get());
+ }
+
uint64_t ReaderImpl::getRowNumber() const {
return previousRow;
}
@@ -1298,10 +1324,10 @@ namespace orc {
}
currentStripe = seekToStripe;
- currentRowInStripe = 0;
- std::unique_ptr<orc::ColumnVectorBatch> batch =
- createRowBatch(rowNumber-firstRowOfStripe[currentStripe]);
- next(*batch);
+ currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+ previousRow = rowNumber;
+ startNextStripe();
+ reader->skip(currentRowInStripe);
}
bool ReaderImpl::hasCorrectStatistics() const {
@@ -1353,10 +1379,11 @@ namespace orc {
virtual const std::vector<bool> getSelectedColumns() const override;
- virtual proto::ColumnEncoding getEncoding(int64_t columnId) const override;
+ virtual proto::ColumnEncoding getEncoding(uint64_t columnId
+ ) const override;
virtual std::unique_ptr<SeekableInputStream>
- getStream(int64_t columnId,
+ getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const override;
@@ -1497,7 +1524,7 @@ namespace orc {
return reader.getSelectedColumns();
}
- proto::ColumnEncoding StripeStreamsImpl::getEncoding(int64_t columnId
+ proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId
) const {
return footer.columns(static_cast<int>(columnId));
}
@@ -1507,7 +1534,7 @@ namespace orc {
}
std::unique_ptr<SeekableInputStream>
- StripeStreamsImpl::getStream(int64_t columnId,
+ StripeStreamsImpl::getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const {
uint64_t offset = stripeStart;
@@ -1591,96 +1618,8 @@ namespace orc {
}
std::unique_ptr<ColumnVectorBatch> ReaderImpl::createRowBatch
- (const Type& type, uint64_t capacity) const {
- ColumnVectorBatch* result = nullptr;
- const Type* subtype;
- switch (static_cast<int64_t>(type.getKind())) {
- case BOOLEAN:
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case DATE:
- result = new LongVectorBatch(capacity, memoryPool);
- break;
- case FLOAT:
- case DOUBLE:
- result = new DoubleVectorBatch(capacity, memoryPool);
- break;
- case STRING:
- case BINARY:
- case CHAR:
- case VARCHAR:
- result = new StringVectorBatch(capacity, memoryPool);
- break;
- case TIMESTAMP:
- result = new TimestampVectorBatch(capacity, memoryPool);
- break;
- case STRUCT:
- {
- StructVectorBatch *structResult =
- new StructVectorBatch(capacity, memoryPool);
- result = structResult;
- for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
- subtype = &(type.getSubtype(i));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- structResult->fields.push_back(createRowBatch(*subtype,
- capacity).release());
- }
- }
- }
- break;
- case LIST:
- result = new ListVectorBatch(capacity, memoryPool);
- subtype = &(type.getSubtype(0));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- dynamic_cast<ListVectorBatch*>(result)->elements =
- createRowBatch(*subtype, capacity);
- }
- break;
- case MAP:
- result = new MapVectorBatch(capacity, memoryPool);
- subtype = &(type.getSubtype(0));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- dynamic_cast<MapVectorBatch*>(result)->keys =
- createRowBatch(*subtype, capacity);
- }
- subtype = &(type.getSubtype(1));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- dynamic_cast<MapVectorBatch*>(result)->elements =
- createRowBatch(*subtype, capacity);
- }
- break;
- case DECIMAL:
- if (type.getPrecision() == 0 || type.getPrecision() > 18) {
- result = new Decimal128VectorBatch(capacity, memoryPool);
- } else {
- result = new Decimal64VectorBatch(capacity, memoryPool);
- }
- break;
- case UNION:
- {
- UnionVectorBatch *unionResult =
- new UnionVectorBatch(capacity, memoryPool);
- result = unionResult;
- for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
- subtype = &(type.getSubtype(i));
- if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
- unionResult->children.push_back(createRowBatch(*subtype,
- capacity).release());
- }
- }
- }
- break;
- default:
- throw NotImplementedYet("not supported yet");
- }
- return std::unique_ptr<ColumnVectorBatch>(result);
- }
-
- std::unique_ptr<ColumnVectorBatch> ReaderImpl::createRowBatch
(uint64_t capacity) const {
- return createRowBatch(*(schema.get()), capacity);
+ return getSelectedType().createRowBatch(capacity, memoryPool);
}
void ensureOrcFooter(InputStream* stream,
@@ -2045,4 +1984,43 @@ namespace orc {
}
}
+ void ReaderImpl::updateSelected(const std::list<uint64_t>& fieldIds) {
+ uint64_t childCount = schema->getSubtypeCount();
+ for(std::list<uint64_t>::const_iterator i = fieldIds.begin();
+ i != fieldIds.end(); ++i) {
+ if (*i >= childCount) {
+ std::stringstream buffer;
+ buffer << "Invalid column selected " << *i << " out of "
+ << childCount;
+ throw ParseError(buffer.str());
+ }
+ const Type& child = *schema->getSubtype(*i);
+ for(size_t c = child.getColumnId();
+ c <= child.getMaximumColumnId(); ++c){
+ selectedColumns[c] = true;
+ }
+ }
+ }
+
+ void ReaderImpl::updateSelected(const std::list<std::string>& fieldNames) {
+ uint64_t childCount = schema->getSubtypeCount();
+ for(std::list<std::string>::const_iterator i = fieldNames.begin();
+ i != fieldNames.end(); ++i) {
+ bool foundMatch = false;
+ for(size_t field=0; field < childCount; ++field) {
+ if (schema->getFieldName(field) == *i) {
+ const Type& child = *schema->getSubtype(field);
+ for(size_t c = child.getColumnId();
+ c <= child.getMaximumColumnId(); ++c){
+ selectedColumns[c] = true;
+ }
+ foundMatch = true;
+ break;
+ }
+ }
+ if (!foundMatch) {
+ throw ParseError("Invalid column selected " + *i);
+ }
+ }
+ }
}// namespace
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/TypeImpl.cc
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index 4d37d27..d3507b0 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -30,7 +30,9 @@ namespace orc {
}
TypeImpl::TypeImpl(TypeKind _kind) {
- columnId = 0;
+ parent = nullptr;
+ columnId = -1;
+ maximumColumnId = -1;
kind = _kind;
maxLength = 0;
precision = 0;
@@ -39,7 +41,9 @@ namespace orc {
}
TypeImpl::TypeImpl(TypeKind _kind, uint64_t _maxLength) {
- columnId = 0;
+ parent = nullptr;
+ columnId = -1;
+ maximumColumnId = -1;
kind = _kind;
maxLength = _maxLength;
precision = 0;
@@ -49,7 +53,9 @@ namespace orc {
TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision,
uint64_t _scale) {
- columnId = 0;
+ parent = nullptr;
+ columnId = -1;
+ maximumColumnId = -1;
kind = _kind;
maxLength = 0;
precision = _precision;
@@ -57,35 +63,13 @@ namespace orc {
subtypeCount = 0;
}
- TypeImpl::TypeImpl(TypeKind _kind,
- const std::vector<Type*>& types,
- const std::vector<std::string>& _fieldNames) {
- columnId = 0;
- kind = _kind;
- maxLength = 0;
- precision = 0;
- scale = 0;
- subtypeCount = static_cast<uint64_t>(types.size());
- subTypes.assign(types.begin(), types.end());
- fieldNames.assign(_fieldNames.begin(), _fieldNames.end());
- }
-
- TypeImpl::TypeImpl(TypeKind _kind, const std::vector<Type*>& types) {
- columnId = 0;
- kind = _kind;
- maxLength = 0;
- precision = 0;
- scale = 0;
- subtypeCount = static_cast<uint64_t>(types.size());
- subTypes.assign(types.begin(), types.end());
- }
-
- int64_t TypeImpl::assignIds(int64_t root) {
- columnId = root;
- int64_t current = root + 1;
+ uint64_t TypeImpl::assignIds(uint64_t root) const {
+ columnId = static_cast<int64_t>(root);
+ uint64_t current = root + 1;
for(uint64_t i=0; i < subtypeCount; ++i) {
- current = subTypes[i]->assignIds(current);
+ current = dynamic_cast<TypeImpl*>(subTypes[i])->assignIds(current);
}
+ maximumColumnId = static_cast<int64_t>(current) - 1;
return current;
}
@@ -96,8 +80,24 @@ namespace orc {
}
}
- int64_t TypeImpl::getColumnId() const {
- return columnId;
+ void TypeImpl::ensureIdAssigned() const {
+ if (columnId == -1) {
+ const TypeImpl* root = this;
+ while (root->parent != nullptr) {
+ root = root->parent;
+ }
+ root->assignIds(0);
+ }
+ }
+
+ uint64_t TypeImpl::getColumnId() const {
+ ensureIdAssigned();
+ return static_cast<uint64_t>(columnId);
+ }
+
+ uint64_t TypeImpl::getMaximumColumnId() const {
+ ensureIdAssigned();
+ return static_cast<uint64_t>(maximumColumnId);
}
TypeKind TypeImpl::getKind() const {
@@ -108,8 +108,8 @@ namespace orc {
return subtypeCount;
}
- const Type& TypeImpl::getSubtype(uint64_t i) const {
- return *(subTypes[i]);
+ const Type* TypeImpl::getSubtype(uint64_t i) const {
+ return subTypes[i];
}
const std::string& TypeImpl::getFieldName(uint64_t i) const {
@@ -128,13 +128,30 @@ namespace orc {
return scale;
}
- Type& TypeImpl::addStructField(std::unique_ptr<Type> fieldType,
- const std::string& fieldName) {
- Type* result = fieldType.release();
- subTypes.push_back(result);
- fieldNames.push_back(fieldName);
+ void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) {
+ columnId = static_cast<int64_t>(_columnId);
+ maximumColumnId = static_cast<int64_t>(_maxColumnId);
+ }
+
+ void TypeImpl::addChildType(std::unique_ptr<Type> childType) {
+ TypeImpl* child = dynamic_cast<TypeImpl*>(childType.release());
+ subTypes.push_back(child);
+ if (child != nullptr) {
+ child->parent = this;
+ }
subtypeCount += 1;
- return *result;
+ }
+
+ Type* TypeImpl::addStructField(const std::string& fieldName,
+ std::unique_ptr<Type> fieldType) {
+ addChildType(std::move(fieldType));
+ fieldNames.push_back(fieldName);
+ return this;
+ }
+
+ Type* TypeImpl::addUnionChild(std::unique_ptr<Type> fieldType) {
+ addChildType(std::move(fieldType));
+ return this;
}
std::string TypeImpl::toString() const {
@@ -160,10 +177,10 @@ namespace orc {
case TIMESTAMP:
return "timestamp";
case LIST:
- return "array<" + subTypes[0]->toString() + ">";
+ return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">";
case MAP:
- return "map<" + subTypes[0]->toString() + "," +
- subTypes[1]->toString() + ">";
+ return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," +
+ (subTypes[1] ? subTypes[1]->toString() : "void") + ">";
case STRUCT: {
std::string result = "struct<";
for(size_t i=0; i < subTypes.size(); ++i) {
@@ -210,6 +227,89 @@ namespace orc {
}
}
+ std::unique_ptr<ColumnVectorBatch>
+ TypeImpl::createRowBatch(uint64_t capacity,
+ MemoryPool& memoryPool) const {
+ switch (static_cast<int64_t>(kind)) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case DATE:
+ return std::unique_ptr<ColumnVectorBatch>
+ (new LongVectorBatch(capacity, memoryPool));
+
+ case FLOAT:
+ case DOUBLE:
+ return std::unique_ptr<ColumnVectorBatch>
+ (new DoubleVectorBatch(capacity, memoryPool));
+
+ case STRING:
+ case BINARY:
+ case CHAR:
+ case VARCHAR:
+ return std::unique_ptr<ColumnVectorBatch>
+ (new StringVectorBatch(capacity, memoryPool));
+
+ case TIMESTAMP:
+ return std::unique_ptr<ColumnVectorBatch>
+ (new TimestampVectorBatch(capacity, memoryPool));
+
+ case STRUCT: {
+ StructVectorBatch *result = new StructVectorBatch(capacity, memoryPool);
+ for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+ result->fields.push_back(getSubtype(i)->
+ createRowBatch(capacity,
+ memoryPool).release());
+ }
+ return std::unique_ptr<ColumnVectorBatch>(result);
+ }
+
+ case LIST: {
+ ListVectorBatch* result = new ListVectorBatch(capacity, memoryPool);
+ if (getSubtype(0) != nullptr) {
+ result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool);
+ }
+ return std::unique_ptr<ColumnVectorBatch>(result);
+ }
+
+ case MAP: {
+ MapVectorBatch* result = new MapVectorBatch(capacity, memoryPool);
+ if (getSubtype(0) != nullptr) {
+ result->keys = getSubtype(0)->createRowBatch(capacity, memoryPool);
+ }
+ if (getSubtype(1) != nullptr) {
+ result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool);
+ }
+ return std::unique_ptr<ColumnVectorBatch>(result);
+ }
+
+ case DECIMAL: {
+ if (getPrecision() == 0 || getPrecision() > 18) {
+ return std::unique_ptr<ColumnVectorBatch>
+ (new Decimal128VectorBatch(capacity, memoryPool));
+ } else {
+ return std::unique_ptr<ColumnVectorBatch>
+ (new Decimal64VectorBatch(capacity, memoryPool));
+ }
+ }
+
+ case UNION: {
+ UnionVectorBatch *result = new UnionVectorBatch(capacity, memoryPool);
+ for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+ result->children.push_back(getSubtype(i)->createRowBatch(capacity,
+ memoryPool)
+ .release());
+ }
+ return std::unique_ptr<ColumnVectorBatch>(result);
+ }
+
+ default:
+ throw NotImplementedYet("not supported yet");
+ }
+ }
+
std::unique_ptr<Type> createPrimitiveType(TypeKind kind) {
return std::unique_ptr<Type>(new TypeImpl(kind));
}
@@ -228,55 +328,22 @@ namespace orc {
return std::unique_ptr<Type>(new TypeImpl(STRUCT));
}
- std::unique_ptr<Type>
- createStructType(std::vector<Type*> types,
- std::vector<std::string> fieldNames) {
- std::vector<Type*> typeVector(types.begin(), types.end());
- std::vector<std::string> fieldVector(fieldNames.begin(), fieldNames.end());
-
- return std::unique_ptr<Type>(new TypeImpl(STRUCT, typeVector,
- fieldVector));
- }
-
-#ifdef ORC_CXX_HAS_INITIALIZER_LIST
- std::unique_ptr<Type> createStructType(
- std::initializer_list<std::unique_ptr<Type> > types,
- std::initializer_list<std::string> fieldNames) {
- std::vector<Type*> typeVector(types.size());
- std::vector<std::string> fieldVector(types.size());
- auto currentType = types.begin();
- auto endType = types.end();
- size_t current = 0;
- while (currentType != endType) {
- typeVector[current++] =
- const_cast<std::unique_ptr<Type>*>(currentType)->release();
- ++currentType;
- }
- fieldVector.insert(fieldVector.end(), fieldNames.begin(),
- fieldNames.end());
- return std::unique_ptr<Type>(new TypeImpl(STRUCT, typeVector,
- fieldVector));
- }
-#endif
-
std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
- std::vector<Type*> subtypes(1);
- subtypes[0] = elements.release();
- return std::unique_ptr<Type>(new TypeImpl(LIST, subtypes));
+ TypeImpl* result = new TypeImpl(LIST);
+ result->addChildType(std::move(elements));
+ return std::unique_ptr<Type>(result);
}
std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key,
std::unique_ptr<Type> value) {
- std::vector<Type*> subtypes(2);
- subtypes[0] = key.release();
- subtypes[1] = value.release();
- return std::unique_ptr<Type>(new TypeImpl(MAP, subtypes));
+ TypeImpl* result = new TypeImpl(MAP);
+ result->addChildType(std::move(key));
+ result->addChildType(std::move(value));
+ return std::unique_ptr<Type>(result);
}
- std::unique_ptr<Type>
- createUnionType(std::vector<Type*> types) {
- std::vector<Type*> typeVector(types.begin(), types.end());
- return std::unique_ptr<Type>(new TypeImpl(UNION, typeVector));
+ std::unique_ptr<Type> createUnionType() {
+ return std::unique_ptr<Type>(new TypeImpl(UNION));
}
std::string printProtobufMessage(const google::protobuf::Message& message);
@@ -311,59 +378,117 @@ namespace orc {
case proto::Type_Kind_LIST:
case proto::Type_Kind_MAP:
case proto::Type_Kind_UNION: {
- uint64_t size = static_cast<uint64_t>(type.subtypes_size());
- std::vector<Type*> typeList(size);
+ TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind()));
for(int i=0; i < type.subtypes_size(); ++i) {
- typeList[static_cast<uint64_t>(i)] =
- convertType(footer.types(static_cast<int>(type.subtypes(i))),
- footer).release();
+ result->addUnionChild(convertType(footer.types(static_cast<int>
+ (type.subtypes(i))),
+ footer));
}
- return std::unique_ptr<Type>
- (new TypeImpl(static_cast<TypeKind>(type.kind()), typeList));
+ return std::unique_ptr<Type>(result);
}
case proto::Type_Kind_STRUCT: {
+ TypeImpl* result = new TypeImpl(STRUCT);
uint64_t size = static_cast<uint64_t>(type.subtypes_size());
std::vector<Type*> typeList(size);
std::vector<std::string> fieldList(size);
for(int i=0; i < type.subtypes_size(); ++i) {
- typeList[static_cast<uint64_t>(i)] =
- convertType(footer.types(static_cast<int>(type.subtypes(i))),
- footer).release();
- fieldList[static_cast<uint64_t>(i)] = type.fieldnames(i);
+ result->addStructField(type.fieldnames(i),
+ convertType(footer.types(static_cast<int>
+ (type.subtypes(i))),
+ footer));
}
- return std::unique_ptr<Type>
- (new TypeImpl(STRUCT, typeList, fieldList));
+ return std::unique_ptr<Type>(result);
}
default:
throw NotImplementedYet("Unknown type kind");
}
}
- std::string kind2String(TypeKind t) {
- std::string name ;
- switch(static_cast<int64_t>(t)) {
- case BOOLEAN: { name = "BOOLEAN"; break; }
- case BYTE: { name = "TINYINT"; break; }
- case SHORT: { name = "SMALLINT"; break; }
- case INT: { name = "INT"; break; }
- case LONG: { name = "BIGINT"; break; }
- case FLOAT: { name = "FLOAT"; break; }
- case DOUBLE: { name = "DOUBLE"; break; }
- case STRING: { name = "STRING"; break; }
- case BINARY: { name = "BINARY"; break; }
- case TIMESTAMP: { name = "TIMESTAMP"; break; }
- case LIST: { name = "LIST"; break; }
- case MAP: { name = "MAP"; break; }
- case STRUCT: { name = "STRUCT"; break; }
- case UNION: { name = "UNION"; break; }
- case DECIMAL: { name = "DECIMAL"; break; }
- case DATE: { name = "DATE"; break; }
- case VARCHAR: { name = "VARCHAR"; break; }
- case CHAR: { name = "CHAR"; break; }
- default: { name = "UNKNOWN"; break; }
+ /**
+ * Build a clone of the file type, projecting columns from the selected
+ * vector. This routine assumes that the parent of any selected column
+ * is also selected. The column ids are copied from the fileType.
+ * @param fileType the type in the file
+ * @param selected is each column by id selected
+ * @return a clone of the fileType filtered by the selection array
+ */
+ std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+ const std::vector<bool>& selected) {
+ if (fileType == nullptr || !selected[fileType->getColumnId()]) {
+ return nullptr;
+ }
+
+ TypeImpl* result;
+ switch (fileType->getKind()) {
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ case BINARY:
+ case TIMESTAMP:
+ case DATE:
+ result = new TypeImpl(fileType->getKind());
+ break;
+
+ case DECIMAL:
+ result= new TypeImpl(fileType->getKind(),
+ fileType->getPrecision(), fileType->getScale());
+ break;
+
+ case VARCHAR:
+ case CHAR:
+ result = new TypeImpl(fileType->getKind(), fileType->getMaximumLength());
+ break;
+
+ case LIST:
+ result = new TypeImpl(fileType->getKind());
+ result->addChildType(buildSelectedType(fileType->getSubtype(0),
+ selected));
+ break;
+
+ case MAP:
+ result = new TypeImpl(fileType->getKind());
+ result->addChildType(buildSelectedType(fileType->getSubtype(0),
+ selected));
+ result->addChildType(buildSelectedType(fileType->getSubtype(1),
+ selected));
+ break;
+
+ case STRUCT: {
+ result = new TypeImpl(fileType->getKind());
+ for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+ std::unique_ptr<Type> childType =
+ buildSelectedType(fileType->getSubtype(child), selected);
+ if (childType.get() != nullptr) {
+ result->addStructField(fileType->getFieldName(child),
+ std::move(childType));
+ }
}
- return name ;
+ break;
}
+ case UNION: {
+ result = new TypeImpl(fileType->getKind());
+ for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+ std::unique_ptr<Type> childType =
+ buildSelectedType(fileType->getSubtype(child), selected);
+ if (childType.get() != nullptr) {
+ result->addUnionChild(std::move(childType));
+ }
+ }
+ break;
+ }
+
+ default:
+ throw NotImplementedYet("Unknown type kind");
+ }
+ result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId());
+ return std::unique_ptr<Type>(result);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/TypeImpl.hh
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index 756375f..e2866e4 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -19,7 +19,7 @@
#ifndef TYPE_IMPL_HH
#define TYPE_IMPL_HH
-#include "orc/Vector.hh"
+#include "orc/Type.hh"
#include "Adaptor.hh"
#include "wrap/orc-proto-wrapper.hh"
@@ -30,7 +30,9 @@ namespace orc {
class TypeImpl: public Type {
private:
- int64_t columnId;
+ TypeImpl* parent;
+ mutable int64_t columnId;
+ mutable int64_t maximumColumnId;
TypeKind kind;
std::vector<Type*> subTypes;
std::vector<std::string> fieldNames;
@@ -56,29 +58,17 @@ namespace orc {
TypeImpl(TypeKind kind, uint64_t precision,
uint64_t scale);
- /**
- * Create struct type.
- */
- TypeImpl(TypeKind kind,
- const std::vector<Type*>& types,
- const std::vector<std::string>& fieldNames);
-
- /**
- * Create list, map, and union type.
- */
- TypeImpl(TypeKind kind, const std::vector<Type*>& types);
-
virtual ~TypeImpl();
- int64_t assignIds(int64_t root) override;
+ uint64_t getColumnId() const override;
- int64_t getColumnId() const override;
+ uint64_t getMaximumColumnId() const override;
TypeKind getKind() const override;
uint64_t getSubtypeCount() const override;
- const Type& getSubtype(uint64_t i) const override;
+ const Type* getSubtype(uint64_t i) const override;
const std::string& getFieldName(uint64_t i) const override;
@@ -90,12 +80,51 @@ namespace orc {
std::string toString() const override;
- Type& addStructField(std::unique_ptr<Type> fieldType,
- const std::string& fieldName) override;
+ Type* addStructField(const std::string& fieldName,
+ std::unique_ptr<Type> fieldType) override;
+ Type* addUnionChild(std::unique_ptr<Type> fieldType) override;
+
+ std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size,
+ MemoryPool& memoryPool
+ ) const override;
+
+ /**
+ * Explicitly set the column ids. Only for internal usage.
+ */
+ void setIds(uint64_t columnId, uint64_t maxColumnId);
+
+ /**
+ * Add a child type.
+ */
+ void addChildType(std::unique_ptr<Type> childType);
+
+ private:
+ /**
+ * Assign ids to this node and its children giving this
+ * node rootId.
+ * @param rootId the column id that should be assigned to this node.
+ */
+ uint64_t assignIds(uint64_t rootId) const;
+
+ /**
+ * Ensure that ids are assigned to all of the nodes.
+ */
+ void ensureIdAssigned() const;
};
std::unique_ptr<Type> convertType(const proto::Type& type,
const proto::Footer& footer);
+
+ /**
+ * Build a clone of the file type, projecting columns from the selected
+ * vector. This routine assumes that the parent of any selected column
+ * is also selected.
+ * @param fileType the type in the file
+ * @param selected is each column by id selected
+ * @return a clone of the fileType filtered by the selection array
+ */
+ std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+ const std::vector<bool>& selected);
}
#endif
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index 5a2105a..cd417c8 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -30,6 +30,7 @@ add_executable (test-orc
TestDriver.cc
TestInt128.cc
TestRle.cc
+ TestType.cc
)
target_link_libraries (test-orc
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnPrinter.cc b/c++/test/TestColumnPrinter.cc
index a2afdb3..8cc3a22 100644
--- a/c++/test/TestColumnPrinter.cc
+++ b/c++/test/TestColumnPrinter.cc
@@ -27,7 +27,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(BOOLEAN);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
LongVectorBatch batch(1024, *getDefaultPool());
const char *expected[] = {"true", "false", "true"};
batch.numElements = 3;
@@ -60,7 +60,8 @@ namespace orc {
TEST(TestColumnPrinter, LongColumnPrinter) {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(LONG);
- std::unique_ptr<ColumnPrinter> printer = createColumnPrinter(line, *type);
+ std::unique_ptr<ColumnPrinter> printer =
+ createColumnPrinter(line, type.get());
LongVectorBatch batch(1024, *getDefaultPool());
batch.numElements = 2;
batch.hasNulls = false;
@@ -94,7 +95,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(DOUBLE);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
DoubleVectorBatch batch(1024, *getDefaultPool());
batch.numElements = 2;
batch.hasNulls = false;
@@ -127,7 +128,8 @@ namespace orc {
TEST(TestColumnPrinter, TimestampColumnPrinter) {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(TIMESTAMP);
- std::unique_ptr<ColumnPrinter> printer = createColumnPrinter(line, *type);
+ std::unique_ptr<ColumnPrinter> printer =
+ createColumnPrinter(line, type.get());
TimestampVectorBatch batch(1024, *getDefaultPool());
batch.numElements = 12;
batch.hasNulls = false;
@@ -193,7 +195,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(DATE);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
LongVectorBatch batch(1024, *getDefaultPool());
batch.numElements = 10;
batch.hasNulls = false;
@@ -243,7 +245,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createDecimalType(16, 5);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
Decimal64VectorBatch batch(1024, *getDefaultPool());
batch.numElements = 10;
batch.hasNulls = false;
@@ -294,7 +296,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createDecimalType(30, 5);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
Decimal128VectorBatch batch(1024, *getDefaultPool());
batch.numElements = 10;
batch.hasNulls = false;
@@ -345,7 +347,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(STRING);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
StringVectorBatch batch(1024, *getDefaultPool());
const char *blob= "thisisatest\b\f\n\r\t\\\"'";
batch.numElements = 5;
@@ -388,7 +390,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createPrimitiveType(BINARY);
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
StringVectorBatch batch(1024, *getDefaultPool());
char blob[45];
for(size_t i=0; i < sizeof(blob); ++i) {
@@ -438,7 +440,7 @@ namespace orc {
std::string line;
std::unique_ptr<Type> type = createListType(createPrimitiveType(LONG));
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
ListVectorBatch batch(1024, *getDefaultPool());
LongVectorBatch* longBatch = new LongVectorBatch(1024, *getDefaultPool());
batch.elements = std::unique_ptr<ColumnVectorBatch>(longBatch);
@@ -490,7 +492,7 @@ namespace orc {
std::unique_ptr<Type> type = createMapType(createPrimitiveType(LONG),
createPrimitiveType(LONG));
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
MapVectorBatch batch(1024, *getDefaultPool());
LongVectorBatch* keyBatch = new LongVectorBatch(1024, *getDefaultPool());
LongVectorBatch* valueBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -541,15 +543,11 @@ namespace orc {
TEST(TestColumnPrinter, StructColumnPrinter) {
std::string line;
- std::vector<std::string> fieldNames;
- std::vector<Type*> subtypes;
- fieldNames.push_back("first");
- fieldNames.push_back("second");
- subtypes.push_back(createPrimitiveType(LONG).release());
- subtypes.push_back(createPrimitiveType(LONG).release());
- std::unique_ptr<Type> type = createStructType(subtypes, fieldNames);
+ std::unique_ptr<Type> type = createStructType();
+ type->addStructField("first", createPrimitiveType(LONG));
+ type->addStructField("second", createPrimitiveType(LONG));
std::unique_ptr<ColumnPrinter> printer =
- createColumnPrinter(line, *type);
+ createColumnPrinter(line, type.get());
StructVectorBatch batch(1024, *getDefaultPool());
LongVectorBatch* firstBatch = new LongVectorBatch(1024, *getDefaultPool());
LongVectorBatch* secondBatch =