You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/01/06 21:56:31 UTC

[1/2] orc git commit: Fixed ORC-29: Enable ColumnPrinter to print only specified columns. (asandryh and omalley)

Repository: orc
Updated Branches:
  refs/heads/master b39302f59 -> 3945f0663


http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index 4b1b4b1..075a069 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -37,14 +37,14 @@ namespace orc {
 class MockStripeStreams: public StripeStreams {
 public:
   ~MockStripeStreams();
-  std::unique_ptr<SeekableInputStream> getStream(int64_t columnId,
+  std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId,
                                                  proto::Stream_Kind kind,
                                                  bool stream) const override;
   MOCK_CONST_METHOD0(getReaderOptions, const ReaderOptions&());
   MOCK_CONST_METHOD0(getSelectedColumns, const std::vector<bool>());
-  MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (int64_t));
+  MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (uint64_t));
   MOCK_CONST_METHOD3(getStreamProxy, SeekableInputStream*
-                     (int64_t, proto::Stream_Kind, bool));
+                     (uint64_t, proto::Stream_Kind, bool));
   MemoryPool& getMemoryPool() const {
     return *getDefaultPool();
   }
@@ -60,7 +60,7 @@ MockStripeStreams::~MockStripeStreams() {
 }
 
 std::unique_ptr<SeekableInputStream>
-MockStripeStreams::getStream(int64_t columnId,
+MockStripeStreams::getStream(uint64_t columnId,
                              proto::Stream_Kind kind,
                              bool shouldStream) const {
   return std::unique_ptr < SeekableInputStream >
@@ -98,8 +98,7 @@ TEST(TestColumnReader, testBooleanWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BOOLEAN), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BOOLEAN));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
   LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -152,8 +151,7 @@ TEST(TestColumnReader, testBooleanSkipsWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BOOLEAN), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BOOLEAN));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
   LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -217,8 +215,7 @@ TEST(TestColumnReader, testByteWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BYTE), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BYTE));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -281,8 +278,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BYTE), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BYTE));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -313,7 +309,7 @@ TEST(TestColumnReader, testIntegerWithNulls) {
 
   // set getSelectedColumns()
   std::vector<bool> selectedColumns(2, true);
-  
+
   EXPECT_CALL(streams, getSelectedColumns())
       .WillRepeatedly(testing::Return(selectedColumns));
 
@@ -337,8 +333,7 @@ TEST(TestColumnReader, testIntegerWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(INT), "myInt" );
-  rowType->assignIds(0);
+  rowType->addStructField("myInt", createPrimitiveType(INT));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -403,8 +398,7 @@ TEST(TestColumnReader, testDictionaryWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "myString");
-  rowType->assignIds(0);
+  rowType->addStructField("myString", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -503,10 +497,9 @@ TEST(TestColumnReader, testVarcharDictionaryWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(VARCHAR), "col0");
-  rowType->addStructField(createPrimitiveType(CHAR), "col1");
-  rowType->addStructField(createPrimitiveType(STRING), "col2");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(VARCHAR))
+    ->addStructField("col1", createPrimitiveType(CHAR))
+    ->addStructField("col2", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -576,11 +569,14 @@ TEST(TestColumnReader, testSubstructsWithNulls) {
                                       (buffer4, ARRAY_SIZE(buffer4))));
 
   // create the row type
+  std::unique_ptr<Type> innerType = createStructType();
+  innerType->addStructField("col2", createPrimitiveType(LONG));
+
+  std::unique_ptr<Type> middleType = createStructType();
+  middleType->addStructField("col1", std::move(innerType));
+
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createStructType(), "col0")
-    .addStructField(createStructType(), "col1")
-    .addStructField(createPrimitiveType(LONG), "col2");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(middleType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -684,9 +680,8 @@ TEST(TestColumnReader, testSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(INT), "myInt");
-  rowType->addStructField(createPrimitiveType(STRING), "myString");
-  rowType->assignIds(0);
+  rowType->addStructField("myInt", createPrimitiveType(INT));
+  rowType->addStructField("myString", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -766,8 +761,7 @@ TEST(TestColumnReader, testBinaryDirect) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BINARY), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BINARY));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -829,8 +823,7 @@ TEST(TestColumnReader, testBinaryDirectWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(BINARY), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(BINARY));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -881,7 +874,7 @@ TEST(TestColumnReader, testShortBlobError) {
   EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
       .WillRepeatedly(testing::Return(new SeekableArrayInputStream
                                       (blob, ARRAY_SIZE(blob))));
-  
+
   const unsigned char buffer1[] = {0x61, 0x00, 0x02};
   EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_LENGTH, true))
       .WillRepeatedly(testing::Return(new SeekableArrayInputStream
@@ -889,8 +882,7 @@ TEST(TestColumnReader, testShortBlobError) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -939,8 +931,7 @@ TEST(TestColumnReader, testStringDirectShortBuffer) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1002,8 +993,7 @@ TEST(TestColumnReader, testStringDirectShortBufferWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1073,8 +1063,7 @@ TEST(TestColumnReader, testStringDirectNullAcrossWindow) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1132,7 +1121,7 @@ TEST(TestColumnReader, testStringDirectSkip) {
       (blob, BLOB_SIZE, 200)));
 
   // the stream of 0 to 1199
-  const unsigned char buffer1[] = 
+  const unsigned char buffer1[] =
     { 0x7f, 0x01, 0x00,
       0x7f, 0x01, 0x82, 0x01,
       0x7f, 0x01, 0x84, 0x02,
@@ -1149,8 +1138,7 @@ TEST(TestColumnReader, testStringDirectSkip) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1250,8 +1238,7 @@ TEST(TestColumnReader, testStringDirectSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(STRING), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createPrimitiveType(STRING));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1338,8 +1325,7 @@ TEST(TestColumnReader, testList) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1371,10 +1357,11 @@ TEST(TestColumnReader, testListPropagateNulls) {
   EXPECT_CALL(streams, getSelectedColumns())
       .WillRepeatedly(testing::Return(selectedColumns));
 
+  std::unique_ptr<Type> innerType = createStructType();
+  innerType->addStructField("col0_0",
+                            createListType(createPrimitiveType(LONG)));
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createStructType(), "col0")
-    .addStructField(createListType(createPrimitiveType(LONG)), "col0_0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(innerType));
 
   // set getEncoding
   proto::ColumnEncoding directEncoding;
@@ -1495,8 +1482,7 @@ TEST(TestColumnReader, testListWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1658,8 +1644,7 @@ TEST(TestColumnReader, testListSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1759,8 +1744,7 @@ TEST(TestColumnReader, testListSkipWithNullsNoData) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createListType(createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1857,10 +1841,8 @@ TEST(TestColumnReader, testMap) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createMapType(createPrimitiveType(LONG),
-                                        createPrimitiveType(LONG)),
-                          "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+                                                createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -1976,10 +1958,8 @@ TEST(TestColumnReader, testMapWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createMapType(createPrimitiveType(LONG),
-                                        createPrimitiveType(LONG)),
-                          "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+                                                createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -2186,10 +2166,8 @@ TEST(TestColumnReader, testMapSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createMapType(createPrimitiveType(LONG),
-                                        createPrimitiveType(LONG)),
-                          "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+                                                createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -2296,10 +2274,8 @@ TEST(TestColumnReader, testMapSkipWithNullsNoData) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createMapType(createPrimitiveType(LONG),
-                                        createPrimitiveType(LONG)),
-                          "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createMapType(createPrimitiveType(LONG),
+                                                createPrimitiveType(LONG)));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -2384,8 +2360,7 @@ TEST(TestColumnReader, testFloatWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(FLOAT), "myFloat");
-  rowType->assignIds(0);
+  rowType->addStructField("myFloat", createPrimitiveType(FLOAT));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2448,8 +2423,7 @@ TEST(TestColumnReader, testFloatSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(FLOAT), "myFloat");
-  rowType->assignIds(0);
+  rowType->addStructField("myFloat", createPrimitiveType(FLOAT));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2550,8 +2524,7 @@ TEST(TestColumnReader, testDoubleWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble");
-  rowType->assignIds(0);
+  rowType->addStructField("myDouble", createPrimitiveType(DOUBLE));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2615,8 +2588,7 @@ TEST(TestColumnReader, testDoubleSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble");
-  rowType->assignIds(0);
+  rowType->addStructField("myDouble", createPrimitiveType(DOUBLE));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2687,7 +2659,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) {
       .WillRepeatedly(testing::Return(new SeekableArrayInputStream
                                       (buffer1, ARRAY_SIZE(buffer1))));
 
-  const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee, 
+  const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee,
                                     0xe2, 0x10, 0xf8, 0x92, 0xee, 0xf, 0x92,
                                     0xa0, 0xd4, 0x30 };
   EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true))
@@ -2701,8 +2673,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp");
-  rowType->assignIds(0);
+  rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2812,8 +2783,7 @@ TEST(TestColumnReader, testTimestamp) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp");
-  rowType->assignIds(0);
+  rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP));
 
   std::unique_ptr<ColumnReader> reader =
       buildReader(*rowType, streams);
@@ -2902,8 +2872,7 @@ TEST(DecimalColumnReader, testDecimal64) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(12, 2), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(12, 2));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -2983,8 +2952,7 @@ TEST(DecimalColumnReader, testDecimal64Skip) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(12, 10), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(12, 10));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3059,8 +3027,7 @@ TEST(DecimalColumnReader, testDecimal128) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(32, 2), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(32, 2));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3153,8 +3120,7 @@ TEST(DecimalColumnReader, testDecimal128Skip) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(38, 37), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(38, 37));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3241,8 +3207,7 @@ TEST(DecimalColumnReader, testDecimalHive11) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3341,8 +3306,7 @@ TEST(DecimalColumnReader, testDecimalHive11Skip) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3427,8 +3391,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleUp) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3516,8 +3479,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleDown) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3587,8 +3549,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowException) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3646,8 +3607,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3712,8 +3672,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowNull) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3795,8 +3754,7 @@ TEST(DecimalColumnReader, testDecimalHive11BigBatches) {
 
   // create the row type
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createDecimalType(0, 0), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", createDecimalType(0, 0));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -3892,12 +3850,11 @@ TEST(TestColumnReader, testUnion) {
                                       (buffer3, ARRAY_SIZE(buffer3))));
 
   // create the row type
-  std::vector<Type*> childrenTypes;
-  childrenTypes.push_back(createPrimitiveType(LONG).release());
-  childrenTypes.push_back(createPrimitiveType(INT).release());
+  std::unique_ptr<Type> unionType = createUnionType();
+  unionType->addUnionChild(createPrimitiveType(LONG));
+  unionType->addUnionChild(createPrimitiveType(INT));
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createUnionType(childrenTypes), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(unionType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -4043,12 +4000,11 @@ TEST(TestColumnReader, testUnionWithNulls) {
                                       (buffer4, ARRAY_SIZE(buffer4))));
 
   // create the row type
-  std::vector<Type*> childrenTypes;
-  childrenTypes.push_back(createPrimitiveType(LONG).release());
-  childrenTypes.push_back(createPrimitiveType(INT).release());
+  std::unique_ptr<Type> unionType = createUnionType();
+  unionType->addUnionChild(createPrimitiveType(LONG));
+  unionType->addUnionChild(createPrimitiveType(INT));
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createUnionType(childrenTypes), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(unionType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -4138,12 +4094,11 @@ TEST(TestColumnReader, testUnionSkips) {
                                       (buffer3, ARRAY_SIZE(buffer3))));
 
   // create the row type
-  std::vector<Type*> childrenTypes;
-  childrenTypes.push_back(createPrimitiveType(LONG).release());
-  childrenTypes.push_back(createPrimitiveType(INT).release());
+  std::unique_ptr<Type> unionType = createUnionType();
+  unionType->addUnionChild(createPrimitiveType(LONG));
+  unionType->addUnionChild(createPrimitiveType(INT));
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createUnionType(childrenTypes), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(unionType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -4242,12 +4197,11 @@ TEST(TestColumnReader, testUnionLongSkip) {
                                       (buffer2, ARRAY_SIZE(buffer2))));
 
   // create the row type
-  std::vector<Type*> childrenTypes;
-  childrenTypes.push_back(createPrimitiveType(LONG).release());
-  childrenTypes.push_back(createPrimitiveType(INT).release());
+  std::unique_ptr<Type> unionType = createUnionType();
+  unionType->addUnionChild(createPrimitiveType(LONG));
+  unionType->addUnionChild(createPrimitiveType(INT));
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createUnionType(childrenTypes), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(unionType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -4326,7 +4280,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
   // for variant in range(0, 130):
   //   [variant & 0x3f, (variant & 0x3f) + 1, (variant & 0x3f) + 2]
   unsigned char buffer[3 * 130];
-  for(int variant = 0; variant < 130; ++variant) {
+  for(uint variant = 0; variant < 130; ++variant) {
     buffer[3 * variant] = 0x00;
     buffer[3 * variant + 1] = 0x01;
     buffer[3 * variant + 2] = static_cast<unsigned char>((variant * 2) & 0x7f);
@@ -4337,13 +4291,12 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
   }
 
   // create the row type
-  std::vector<Type*> childrenTypes;
+  std::unique_ptr<Type> unionType = createUnionType();
   for(size_t variant=0; variant < 130; ++variant) {
-    childrenTypes.push_back(createPrimitiveType(LONG).release());
+    unionType->addUnionChild(createPrimitiveType(LONG));
   }
   std::unique_ptr<Type> rowType = createStructType();
-  rowType->addStructField(createUnionType(childrenTypes), "col0");
-  rowType->assignIds(0);
+  rowType->addStructField("col0", std::move(unionType));
 
   std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams);
 
@@ -4366,7 +4319,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) {
   for (size_t i = 0; i < batch.numElements; ++i) {
     EXPECT_EQ(i, unions->tags[i]);
     EXPECT_EQ(0, unions->offsets[i]);
-    EXPECT_EQ(i & 0x3f, 
+    EXPECT_EQ(i & 0x3f,
               dynamic_cast<LongVectorBatch*>(unions->children[unions->tags[i]])
               ->data[unions->offsets[i]]);
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestType.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc
new file mode 100644
index 0000000..3c595d0
--- /dev/null
+++ b/c++/test/TestType.cc
@@ -0,0 +1,277 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Adaptor.hh"
+#include "OrcTest.hh"
+#include "orc/Type.hh"
+#include "wrap/gtest-wrapper.h"
+
+#include "TypeImpl.hh"
+
+namespace orc {
+
+  uint64_t checkIds(const Type* type, uint64_t next) {
+    EXPECT_EQ(next, type->getColumnId())
+      << "Wrong id for " << type->toString();
+    next += 1;
+    for(uint64_t child = 0; child < type->getSubtypeCount(); ++child) {
+      next = checkIds(type->getSubtype(child), next) + 1;
+    }
+    EXPECT_EQ(next - 1, type->getMaximumColumnId())
+      << "Wrong maximum id for " << type->toString();
+    return type->getMaximumColumnId();
+  }
+
+  TEST(TestType, simple) {
+    std::unique_ptr<Type> myType = createStructType();
+    myType->addStructField("myInt", createPrimitiveType(INT));
+    myType->addStructField("myString", createPrimitiveType(STRING));
+    myType->addStructField("myFloat", createPrimitiveType(FLOAT));
+    myType->addStructField("list", createListType(createPrimitiveType(LONG)));
+    myType->addStructField("bool", createPrimitiveType(BOOLEAN));
+
+    EXPECT_EQ(0, myType->getColumnId());
+    EXPECT_EQ(6, myType->getMaximumColumnId());
+    EXPECT_EQ(5, myType->getSubtypeCount());
+    EXPECT_EQ(STRUCT, myType->getKind());
+    EXPECT_EQ("struct<myInt:int,myString:string,myFloat:float,"
+              "list:array<bigint>,bool:boolean>",
+              myType->toString());
+    checkIds(myType.get(), 0);
+
+    const Type* child = myType->getSubtype(0);
+    EXPECT_EQ(1, child->getColumnId());
+    EXPECT_EQ(1, child->getMaximumColumnId());
+    EXPECT_EQ(INT, child->getKind());
+    EXPECT_EQ(0, child->getSubtypeCount());
+
+    child = myType->getSubtype(1);
+    EXPECT_EQ(2, child->getColumnId());
+    EXPECT_EQ(2, child->getMaximumColumnId());
+    EXPECT_EQ(STRING, child->getKind());
+    EXPECT_EQ(0, child->getSubtypeCount());
+
+    child = myType->getSubtype(2);
+    EXPECT_EQ(3, child->getColumnId());
+    EXPECT_EQ(3, child->getMaximumColumnId());
+    EXPECT_EQ(FLOAT, child->getKind());
+    EXPECT_EQ(0, child->getSubtypeCount());
+
+    child = myType->getSubtype(3);
+    EXPECT_EQ(4, child->getColumnId());
+    EXPECT_EQ(5, child->getMaximumColumnId());
+    EXPECT_EQ(LIST, child->getKind());
+    EXPECT_EQ(1, child->getSubtypeCount());
+    EXPECT_EQ("array<bigint>", child->toString());
+
+    child = child->getSubtype(0);
+    EXPECT_EQ(5, child->getColumnId());
+    EXPECT_EQ(5, child->getMaximumColumnId());
+    EXPECT_EQ(LONG, child->getKind());
+    EXPECT_EQ(0, child->getSubtypeCount());
+
+    child = myType->getSubtype(4);
+    EXPECT_EQ(6, child->getColumnId());
+    EXPECT_EQ(6, child->getMaximumColumnId());
+    EXPECT_EQ(BOOLEAN, child->getKind());
+    EXPECT_EQ(0, child->getSubtypeCount());
+  }
+
+  TEST(TestType, nested) {
+    std::unique_ptr<Type> myType = createStructType();
+    {
+      std::unique_ptr<Type> innerStruct = createStructType();
+      innerStruct->addStructField("col0", createPrimitiveType(INT));
+
+      std::unique_ptr<Type> unionType = createUnionType();
+      unionType->addUnionChild(std::move(innerStruct));
+      unionType->addUnionChild(createPrimitiveType(STRING));
+
+      myType->addStructField("myList",
+                             createListType
+                             (createMapType(createPrimitiveType(STRING),
+                                            std::move(unionType))));
+    }
+
+    // get a pointer to the bottom type
+    const Type* listType = myType->getSubtype(0);
+    const Type* mapType = listType->getSubtype(0);
+    const Type* unionType = mapType->getSubtype(1);
+    const Type* structType = unionType->getSubtype(0);
+    const Type* intType = structType->getSubtype(0);
+
+    // calculate the id of the child to make sure that we climb correctly
+    EXPECT_EQ(6, intType->getColumnId());
+    EXPECT_EQ(6, intType->getMaximumColumnId());
+    EXPECT_EQ("int", intType->toString());
+
+    checkIds(myType.get(), 0);
+
+    EXPECT_EQ(5, structType->getColumnId());
+    EXPECT_EQ(6, structType->getMaximumColumnId());
+    EXPECT_EQ("struct<col0:int>", structType->toString());
+
+    EXPECT_EQ(4, unionType->getColumnId());
+    EXPECT_EQ(7, unionType->getMaximumColumnId());
+    EXPECT_EQ("uniontype<struct<col0:int>,string>", unionType->toString());
+
+    EXPECT_EQ(2, mapType->getColumnId());
+    EXPECT_EQ(7, mapType->getMaximumColumnId());
+    EXPECT_EQ("map<string,uniontype<struct<col0:int>,string>>",
+              mapType->toString());
+
+    EXPECT_EQ(1, listType->getColumnId());
+    EXPECT_EQ(7, listType->getMaximumColumnId());
+    EXPECT_EQ("array<map<string,uniontype<struct<col0:int>,string>>>",
+              listType->toString());
+
+    EXPECT_EQ(0, myType->getColumnId());
+    EXPECT_EQ(7, myType->getMaximumColumnId());
+    EXPECT_EQ("struct<myList:array<map<string,uniontype<struct<col0:int>,"
+              "string>>>>",
+              myType->toString());
+  }
+
+  TEST(TestType, selectedType) {
+    std::unique_ptr<Type> myType = createStructType();
+    myType->addStructField("col0", createPrimitiveType(BYTE));
+    myType->addStructField("col1", createPrimitiveType(SHORT));
+    myType->addStructField("col2",
+                           createListType(createPrimitiveType(STRING)));
+    myType->addStructField("col3",
+                           createMapType(createPrimitiveType(FLOAT),
+                                         createPrimitiveType(DOUBLE)));
+    std::unique_ptr<Type> unionType = createUnionType();
+    unionType->addUnionChild(createCharType(CHAR, 100));
+    unionType->addUnionChild(createCharType(VARCHAR, 200));
+    myType->addStructField("col4", std::move(unionType));
+    myType->addStructField("col5", createPrimitiveType(INT));
+    myType->addStructField("col6", createPrimitiveType(LONG));
+    myType->addStructField("col7", createDecimalType(10, 2));
+
+    checkIds(myType.get(), 0);
+    EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>,"
+              "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>,"
+              "col5:int,col6:bigint,col7:decimal(10,2)>", myType->toString());
+    EXPECT_EQ(0, myType->getColumnId());
+    EXPECT_EQ(13, myType->getMaximumColumnId());
+
+    std::vector<bool> selected(14);
+    selected[0] = true;
+    selected[2] = true;
+    std::unique_ptr<Type> cutType = buildSelectedType(myType.get(),
+                                                      selected);
+    EXPECT_EQ("struct<col1:smallint>", cutType->toString());
+    EXPECT_EQ(0, cutType->getColumnId());
+    EXPECT_EQ(13, cutType->getMaximumColumnId());
+    EXPECT_EQ(2, cutType->getSubtype(0)->getColumnId());
+
+    selected.assign(14, true);
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>,"
+              "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>,"
+              "col5:int,col6:bigint,col7:decimal(10,2)>", cutType->toString());
+    EXPECT_EQ(0, cutType->getColumnId());
+    EXPECT_EQ(13, cutType->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[8] = true;
+    selected[10] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col4:uniontype<varchar(200)>>", cutType->toString());
+    EXPECT_EQ(0, cutType->getColumnId());
+    EXPECT_EQ(13, cutType->getMaximumColumnId());
+    EXPECT_EQ(8, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(10, cutType->getSubtype(0)->getMaximumColumnId());
+    EXPECT_EQ(10, cutType->getSubtype(0)->getSubtype(0)->getColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[8] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col4:uniontype<>>", cutType->toString());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<>", cutType->toString());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[3] = true;
+    selected[4] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col2:array<string>>", cutType->toString());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[3] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col2:array<void>>", cutType->toString());
+    EXPECT_EQ(3, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(4, cutType->getSubtype(0)->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[5] = true;
+    selected[6] = true;
+    selected[7] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col3:map<float,double>>", cutType->toString());
+    EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[5] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col3:map<void,void>>", cutType->toString());
+    EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[5] = true;
+    selected[6] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col3:map<float,void>>", cutType->toString());
+    EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[5] = true;
+    selected[7] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col3:map<void,double>>", cutType->toString());
+    EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId());
+
+    selected.assign(14, false);
+    selected[0] = true;
+    selected[1] = true;
+    selected[13] = true;
+    cutType = buildSelectedType(myType.get(), selected);
+    EXPECT_EQ("struct<col0:tinyint,col7:decimal(10,2)>", cutType->toString());
+    EXPECT_EQ(1, cutType->getSubtype(0)->getColumnId());
+    EXPECT_EQ(1, cutType->getSubtype(0)->getMaximumColumnId());
+    EXPECT_EQ(13, cutType->getSubtype(1)->getColumnId());
+    EXPECT_EQ(13, cutType->getSubtype(1)->getMaximumColumnId());
+  }
+}

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileContents.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc
index 694fea3..ff7eb72 100644
--- a/tools/src/FileContents.cc
+++ b/tools/src/FileContents.cc
@@ -32,7 +32,7 @@ void printContents(const char* filename, const orc::ReaderOptions opts) {
   std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000);
   std::string line;
   std::unique_ptr<orc::ColumnPrinter> printer =
-    createColumnPrinter(line, reader->getType());
+    createColumnPrinter(line, &reader->getSelectedType());
 
   while (reader->next(*batch)) {
     printer->reset(*batch);
@@ -48,12 +48,36 @@ void printContents(const char* filename, const orc::ReaderOptions opts) {
 
 int main(int argc, char* argv[]) {
   if (argc < 2) {
-    std::cout << "Usage: file-contents <filename>\n";
+    std::cout << "Usage: file-contents <filename> [--columns=1,2,...]\n"
+              << "Print contents of <filename>.\n"
+              << "If columns are specified, only these top-level (logical) columns are printed.\n" ;
     return 1;
   }
   try {
+    const std::string COLUMNS_PREFIX = "--columns=";
+    std::list<uint64_t> cols;
+    char* filename = ORC_NULLPTR;
+
+    // Read command-line options
+    char *param, *value;
+    for (int i = 1; i < argc; i++) {
+      if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) {
+        value = std::strtok(param+COLUMNS_PREFIX.length(), "," );
+        while (value) {
+          cols.push_back(static_cast<uint64_t>(std::atoi(value)));
+          value = std::strtok(nullptr, "," );
+        }
+      } else {
+        filename = argv[i];
+      }
+    }
     orc::ReaderOptions opts;
-    printContents(argv[1], opts);
+    if (cols.size() > 0) {
+      opts.include(cols);
+    }
+    if (filename != ORC_NULLPTR) {
+      printContents(filename, opts);
+    }
   } catch (std::exception& ex) {
     std::cerr << "Caught exception: " << ex.what() << "\n";
     return 1;

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileMemory.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc
index ba8459a..2bfd21a 100644
--- a/tools/src/FileMemory.cc
+++ b/tools/src/FileMemory.cc
@@ -70,7 +70,7 @@ int main(int argc, char* argv[]) {
   char* filename = ORC_NULLPTR;
 
   // Default parameters
-  std::list<int64_t> cols;
+  std::list<uint64_t> cols;
   uint32_t batchSize = 1000;
 
   // Read command-line options
@@ -79,7 +79,7 @@ int main(int argc, char* argv[]) {
     if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) {
       value = std::strtok(param+COLUMNS_PREFIX.length(), "," );
       while (value) {
-        cols.push_back(std::atoi(value));
+        cols.push_back(static_cast<uint64_t>(std::atoi(value)));
         value = std::strtok(nullptr, "," );
       }
     } else if ( (param=strstr(argv[i], BATCH_PREFIX.c_str())) ) {

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileScan.cc
----------------------------------------------------------------------
diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc
index cbb5980..4683847 100644
--- a/tools/src/FileScan.cc
+++ b/tools/src/FileScan.cc
@@ -31,10 +31,6 @@ int main(int argc, char* argv[]) {
   }
 
   orc::ReaderOptions opts;
-  std::list<int64_t> cols;
-  cols.push_back(0);
-  opts.include(cols);
-
   std::unique_ptr<orc::Reader> reader;
   try{
     reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts);

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index 0d337ca..7862eff 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -140,7 +140,7 @@ namespace orc {
     std::unique_ptr<ColumnVectorBatch> batch = reader->createRowBatch(1024);
     std::string line;
     std::unique_ptr<orc::ColumnPrinter> printer =
-      orc::createColumnPrinter(line, reader->getType());
+      orc::createColumnPrinter(line, &reader->getSelectedType());
     GzipTextReader expected(getJsonFilename());
     std::string expectedLine;
     while (reader->next(*batch)) {
@@ -511,8 +511,8 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
 
   TEST(Reader, columnSelectionTest) {
     ReaderOptions opts;
-    std::list<int64_t> includes;
-    for(int i=1; i < 10; i += 2) {
+    std::list<uint64_t> includes;
+    for(uint64_t i=0; i < 9; i += 2) {
       includes.push_back(i);
     }
     opts.include(includes);
@@ -545,17 +545,17 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
     EXPECT_EQ("_col6", rootType.getFieldName(6));
     EXPECT_EQ("_col7", rootType.getFieldName(7));
     EXPECT_EQ("_col8", rootType.getFieldName(8));
-    EXPECT_EQ(INT, rootType.getSubtype(0).getKind());
-    EXPECT_EQ(STRING, rootType.getSubtype(1).getKind());
-    EXPECT_EQ(STRING, rootType.getSubtype(2).getKind());
-    EXPECT_EQ(STRING, rootType.getSubtype(3).getKind());
-    EXPECT_EQ(INT, rootType.getSubtype(4).getKind());
-    EXPECT_EQ(STRING, rootType.getSubtype(5).getKind());
-    EXPECT_EQ(INT, rootType.getSubtype(6).getKind());
-    EXPECT_EQ(INT, rootType.getSubtype(7).getKind());
-    EXPECT_EQ(INT, rootType.getSubtype(8).getKind());
+    EXPECT_EQ(INT, rootType.getSubtype(0)->getKind());
+    EXPECT_EQ(STRING, rootType.getSubtype(1)->getKind());
+    EXPECT_EQ(STRING, rootType.getSubtype(2)->getKind());
+    EXPECT_EQ(STRING, rootType.getSubtype(3)->getKind());
+    EXPECT_EQ(INT, rootType.getSubtype(4)->getKind());
+    EXPECT_EQ(STRING, rootType.getSubtype(5)->getKind());
+    EXPECT_EQ(INT, rootType.getSubtype(6)->getKind());
+    EXPECT_EQ(INT, rootType.getSubtype(7)->getKind());
+    EXPECT_EQ(INT, rootType.getSubtype(8)->getKind());
     for(unsigned int i=0; i < 9; ++i) {
-      EXPECT_EQ(i + 1, rootType.getSubtype(i).getColumnId())
+      EXPECT_EQ(i + 1, rootType.getSubtype(i)->getColumnId())
         << "fail on " << i;
     }
 
@@ -637,6 +637,11 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest,
     StructVectorBatch *fullStructBatch =
       dynamic_cast<StructVectorBatch*>(fullBatch.get());
     ASSERT_TRUE(fullStructBatch != nullptr);
+    std::cout << "OOM fullBatch " << reinterpret_cast<uint64_t>(fullStructBatch)
+              << "\n";
+    std::cout << "OOM fields.size() "
+              << fullStructBatch->fields.size()
+              << "\n";
     LongVectorBatch* fullLongVector =
       dynamic_cast<LongVectorBatch*>(fullStructBatch->fields[0]);
     ASSERT_TRUE(fullLongVector != nullptr);
@@ -911,11 +916,8 @@ TEST(Reader, selectColumns) {
     orc::ReaderOptions opts;
     std::ostringstream filename;
     filename << exampleDirectory << "/TestOrcFile.testSeek.orc";
-    std::list<int64_t> cols;
 
     // All columns
-    cols.push_back(0);
-    opts.include(cols);
     std::unique_ptr<orc::Reader> reader =
         orc::createReader(orc::readLocalFile(filename.str()), opts);
     std::vector<bool> c = reader->getSelectedColumns();
@@ -923,10 +925,30 @@ TEST(Reader, selectColumns) {
     for (unsigned int i=0; i < c.size(); i++) {
       EXPECT_TRUE(c[i]);
     }
+    std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1);
+    std::string line;
+    std::unique_ptr<orc::ColumnPrinter> printer =
+        createColumnPrinter(line, &reader->getSelectedType());
+    reader->next(*batch);
+    printer->reset(*batch);
+    printer->printRow(0);
+    std::ostringstream expected;
+    expected << "{\"boolean1\": true, \"byte1\": -76, "
+        << "\"short1\": 21684, \"int1\": -941468492, "
+        << "\"long1\": -6863419716327549772, \"float1\": 0.7762409, "
+        << "\"double1\": 0.77624090391187, \"bytes1\": [123, 108, 207, 27, 93, "
+        << "157, 139, 233, 181, 90, 14, 60, 34, 120, 26, 119, 231, 50, 155, 121], "
+        << "\"string1\": \"887336a7\", \"middle\": {\"list\": [{\"int1\": "
+        << "-941468492, \"string1\": \"887336a7\"}, {\"int1\": -1598014431, "
+        << "\"string1\": \"ba419d35-x\"}]}, \"list\": [], \"map\": [{\"key\": "
+        << "\"ba419d35-x\", \"value\": {\"int1\": -1598014431, \"string1\": "
+        << "\"ba419d35-x\"}}, {\"key\": \"887336a7\", \"value\": {\"int1\": "
+        << "-941468492, \"string1\": \"887336a7\"}}]}";
+    EXPECT_EQ(expected.str(), line);
 
     // Int column #2
-    cols.clear();
-    cols.push_back(2);
+    std::list<uint64_t> cols;
+    cols.push_back(1);
     opts.include(cols);
     reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
     c = reader->getSelectedColumns();
@@ -936,10 +958,19 @@ TEST(Reader, selectColumns) {
       else
         EXPECT_TRUE(!c[i]);
     }
+    batch = reader->createRowBatch(1);
+    line.clear();
+    printer = createColumnPrinter(line, &reader->getSelectedType());
+    reader->next(*batch);
+    printer->reset(*batch);
+    printer->printRow(0);
+    std::string expectedInt("{\"byte1\": -76}");
+    EXPECT_EQ(expectedInt, line);
+
 
     // Struct column #10
     cols.clear();
-    cols.push_back(10);
+    cols.push_back(9);
     opts.include(cols);
     reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
     c = reader->getSelectedColumns();
@@ -949,10 +980,21 @@ TEST(Reader, selectColumns) {
       else
         EXPECT_TRUE(!c[i]);
     }
+    batch = reader->createRowBatch(1);
+    line.clear();
+    printer = createColumnPrinter(line, &reader->getSelectedType());
+    reader->next(*batch);
+    printer->reset(*batch);
+    printer->printRow(0);
+    std::ostringstream expectedStruct;
+    expectedStruct << "{\"middle\": {\"list\": "
+        << "[{\"int1\": -941468492, \"string1\": \"887336a7\"}, "
+        << "{\"int1\": -1598014431, \"string1\": \"ba419d35-x\"}]}}";
+    EXPECT_EQ(expectedStruct.str(), line);
 
     // Array column #11
     cols.clear();
-    cols.push_back(11);
+    cols.push_back(10);
     opts.include(cols);
     reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
     c = reader->getSelectedColumns();
@@ -962,10 +1004,18 @@ TEST(Reader, selectColumns) {
       else
         EXPECT_TRUE(!c[i]);
     }
+    batch = reader->createRowBatch(1);
+    line.clear();
+    printer = createColumnPrinter(line, &reader->getSelectedType());
+    reader->next(*batch);
+    printer->reset(*batch);
+    printer->printRow(0);
+    std::string expectedArray("{\"list\": []}");
+    EXPECT_EQ(expectedArray, line);
 
     // Map column #12
     cols.clear();
-    cols.push_back(12);
+    cols.push_back(11);
     opts.include(cols);
     reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
     c = reader->getSelectedColumns();
@@ -975,6 +1025,18 @@ TEST(Reader, selectColumns) {
       else
         EXPECT_TRUE(!c[i]);
     }
+    batch = reader->createRowBatch(1);
+    line.clear();
+    printer = createColumnPrinter(line, &reader->getSelectedType());
+    reader->next(*batch);
+    printer->reset(*batch);
+    printer->printRow(0);
+    std::ostringstream expectedMap;
+    expectedMap << "{\"map\": [{\"key\": \"ba419d35-x\", \"value\": {\"int1\":"
+        << " -1598014431, \"string1\": \"ba419d35-x\"}}, {\"key\": "
+        << "\"887336a7\", \"value\": {\"int1\": -941468492, \"string1\": "
+        << "\"887336a7\"}}]}";
+    EXPECT_EQ(expectedMap.str(), line);
 }
 
 TEST(Reader, memoryUse) {
@@ -983,10 +1045,10 @@ TEST(Reader, memoryUse) {
   std::unique_ptr<orc::Reader> reader;
   std::unique_ptr<orc::ColumnVectorBatch> batch;
   orc::ReaderOptions opts;
-  std::list<int64_t> cols;
+  std::list<uint64_t> cols;
 
   // Int column
-  cols.push_back(2);
+  cols.push_back(1);
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(483517, reader->getMemoryUse());
@@ -998,7 +1060,7 @@ TEST(Reader, memoryUse) {
 
   // Binary column
   cols.clear();
-  cols.push_back(8);
+  cols.push_back(7);
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(835906, reader->getMemoryUse());
@@ -1008,7 +1070,7 @@ TEST(Reader, memoryUse) {
 
   // String column
   cols.clear();
-  cols.push_back(9);
+  cols.push_back(8);
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(901442, reader->getMemoryUse());
@@ -1018,7 +1080,7 @@ TEST(Reader, memoryUse) {
 
   // Struct column (with a List subcolumn)
   cols.clear();
-  cols.push_back(10);
+  cols.push_back(9);
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(1294658, reader->getMemoryUse());
@@ -1028,7 +1090,7 @@ TEST(Reader, memoryUse) {
 
   // List column
    cols.clear();
-   cols.push_back(11);
+   cols.push_back(10);
    opts.include(cols);
    reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
    EXPECT_EQ(1229122, reader->getMemoryUse());
@@ -1038,7 +1100,7 @@ TEST(Reader, memoryUse) {
 
   // Map column
   cols.clear();
-  cols.push_back(12);
+  cols.push_back(11);
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(1491266, reader->getMemoryUse());
@@ -1048,7 +1110,9 @@ TEST(Reader, memoryUse) {
 
   // All columns
   cols.clear();
-  cols.push_back(0);
+  for(uint64_t c=0; c < 12; ++c) {
+    cols.push_back(c);
+  }
   opts.include(cols);
   reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
   EXPECT_EQ(4112706, reader->getMemoryUse());


[2/2] orc git commit: Fixed ORC-29: Enable ColumnPrinter to print only specified columns. (asandryh and omalley)

Posted by om...@apache.org.
Fixed ORC-29: Enable ColumnPrinter to print only specified columns.
(asandryh and omalley)

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/3945f066
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/3945f066
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/3945f066

Branch: refs/heads/master
Commit: 3945f0663517b2d67d14c09ddb5990e3b569ffea
Parents: b39302f
Author: Aliaksei Sandryhaila <al...@hp.com>
Authored: Wed Jan 6 12:50:52 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Wed Jan 6 12:55:43 2016 -0800

----------------------------------------------------------------------
 c++/include/CMakeLists.txt       |   1 +
 c++/include/orc/ColumnPrinter.hh |   5 +-
 c++/include/orc/Reader.hh        |  64 ++++--
 c++/include/orc/Type.hh          | 105 ++++++++++
 c++/include/orc/Vector.hh        |  75 -------
 c++/src/ColumnPrinter.cc         | 228 +++++++++++----------
 c++/src/ColumnReader.cc          |  10 +-
 c++/src/ColumnReader.hh          |   6 +-
 c++/src/Reader.cc                | 224 ++++++++++-----------
 c++/src/TypeImpl.cc              | 367 +++++++++++++++++++++++-----------
 c++/src/TypeImpl.hh              |  67 +++++--
 c++/test/CMakeLists.txt          |   1 +
 c++/test/TestColumnPrinter.cc    |  36 ++--
 c++/test/TestColumnReader.cc     | 225 +++++++++------------
 c++/test/TestType.cc             | 277 +++++++++++++++++++++++++
 tools/src/FileContents.cc        |  30 ++-
 tools/src/FileMemory.cc          |   4 +-
 tools/src/FileScan.cc            |   4 -
 tools/test/TestReader.cc         | 122 ++++++++---
 19 files changed, 1186 insertions(+), 665 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/include/CMakeLists.txt b/c++/include/CMakeLists.txt
index 474c733..3891e71 100644
--- a/c++/include/CMakeLists.txt
+++ b/c++/include/CMakeLists.txt
@@ -77,6 +77,7 @@ install(FILES
   "orc/MemoryPool.hh"
   "orc/OrcFile.hh"
   "orc/Reader.hh"
+  "orc/Type.hh"
   "orc/Vector.hh"
   DESTINATION "include/orc"
   )

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/ColumnPrinter.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/ColumnPrinter.hh b/c++/include/orc/ColumnPrinter.hh
index 17c1901..aa19214 100644
--- a/c++/include/orc/ColumnPrinter.hh
+++ b/c++/include/orc/ColumnPrinter.hh
@@ -34,12 +34,11 @@ namespace orc {
   class ColumnPrinter {
   protected:
     std::string &buffer;
-    const Type& type;
     bool hasNulls ;
     const char* notNull;
 
   public:
-    ColumnPrinter(std::string&, const Type&);
+    ColumnPrinter(std::string&);
     virtual ~ColumnPrinter();
     virtual void printRow(uint64_t rowId) = 0;
     // should be called once at the start of each batch of rows
@@ -47,6 +46,6 @@ namespace orc {
   };
 
   ORC_UNIQUE_PTR<ColumnPrinter> createColumnPrinter(std::string&,
-						    const Type& type);
+                                                    const Type* type);
 }
 #endif

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Reader.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 316867d..d924fbf 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -20,7 +20,8 @@
 #define ORC_READER_HH
 
 #include "orc/orc-config.hh"
-#include "Vector.hh"
+#include "orc/Type.hh"
+#include "orc/Vector.hh"
 
 #include <memory>
 #include <string>
@@ -411,22 +412,23 @@ namespace orc {
     virtual ~ReaderOptions();
 
     /**
-     * Set the list of columns to read. All columns that are children of
-     * selected columns are automatically selected. The default value is
-     * {0}.
-     * @param include a list of columns to read
+     * For files that have structs as the top-level object, select the fields
+     * to read. The first field is 0, the second 1, and so on. By default,
+     * all columns are read. This option clears any previous setting of
+     * the selected columns.
+     * @param include a list of fields to read
      * @return this
      */
-    ReaderOptions& include(const std::list<int64_t>& include);
+    ReaderOptions& include(const std::list<uint64_t>& include);
 
     /**
-     * Set the list of columns to read. All columns that are children of
-     * selected columns are automatically selected. The default value is
-     * {0}.
-     * @param include a list of columns to read
+     * For files that have structs as the top-level object, select the fields
+     * to read by name. By default, all columns are read. This option clears
+     * any previous setting of the selected columns.
+     * @param include a list of fields to read
      * @return this
      */
-    ReaderOptions& include(std::vector<int64_t> include);
+    ReaderOptions& include(const std::list<std::string>& include);
 
     /**
      * Set the section of the file to process.
@@ -493,10 +495,26 @@ namespace orc {
     ReaderOptions& setMemoryPool(MemoryPool& pool);
 
     /**
+     * Were the include indexes set?
+     */
+    bool getIndexesSet() const;
+
+    /**
      * Get the list of selected columns to read. All children of the selected
      * columns are also selected.
      */
-    const std::list<int64_t>& getInclude() const;
+    const std::list<uint64_t>& getInclude() const;
+
+    /**
+     * Were the include names set?
+     */
+    bool getNamesSet() const;
+
+    /**
+     * Get the list of selected columns to read. All children of the selected
+     * columns are also selected.
+     */
+    const std::list<std::string>& getIncludeNames() const;
 
     /**
      * Get the start of the range for the data being processed.
@@ -652,12 +670,23 @@ namespace orc {
     getColumnStatistics(uint32_t columnId) const = 0;
 
     /**
-     * Get the type of the rows in the file. The top level is always a struct.
+     * Get the type of the rows in the file. The top level is typically a
+     * struct.
      * @return the root type
      */
     virtual const Type& getType() const = 0;
 
     /**
+     * Get the selected type of the rows in the file. The file's row type
+     * is projected down to just the selected columns. Thus, if the file's
+     * type is struct<col0:int,col1:double,col2:string> and the selected
+     * columns are "col0,col2" the selected type would be
+     * struct<col0:int,col2:string>.
+     * @return the root type
+     */
+    virtual const Type& getSelectedType() const = 0;
+
+    /**
      * Get the selected columns of the file.
      */
     virtual const std::vector<bool> getSelectedColumns() const = 0;
@@ -667,8 +696,8 @@ namespace orc {
      * @param size the number of rows to read
      * @return a new ColumnVectorBatch to read into
      */
-    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch
-    (uint64_t size) const = 0;
+    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size
+                                                             ) const = 0;
 
     /**
      * Read the next row batch from the current position.
@@ -713,9 +742,10 @@ namespace orc {
     /**
      * Estimate an upper bound on heap memory allocation by the Reader
      * based on the information in the file footer.
-     * The bound is less tight if only few columns are read or compression is used.
+     * The bound is less tight if only few columns are read or compression is
+     * used.
      * @param stripeIx index of the stripe to be read (if not specified,
-    * all stripes are considered).
+     *        all stripes are considered).
      * @return upper bound on memory use
      */
     virtual uint64_t getMemoryUse(int stripeIx=-1) = 0;

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Type.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
new file mode 100644
index 0000000..25b8f53
--- /dev/null
+++ b/c++/include/orc/Type.hh
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_TYPE_HH
+#define ORC_TYPE_HH
+
+#include "orc/orc-config.hh"
+#include "orc/Vector.hh"
+#include "MemoryPool.hh"
+
+namespace orc {
+
+  enum TypeKind {
+    BOOLEAN = 0,
+    BYTE = 1,
+    SHORT = 2,
+    INT = 3,
+    LONG = 4,
+    FLOAT = 5,
+    DOUBLE = 6,
+    STRING = 7,
+    BINARY = 8,
+    TIMESTAMP = 9,
+    LIST = 10,
+    MAP = 11,
+    STRUCT = 12,
+    UNION = 13,
+    DECIMAL = 14,
+    DATE = 15,
+    VARCHAR = 16,
+    CHAR = 17
+  };
+
+  class Type {
+  public:
+    virtual ~Type();
+    virtual uint64_t getColumnId() const = 0;
+    virtual uint64_t getMaximumColumnId() const = 0;
+    virtual TypeKind getKind() const = 0;
+    virtual uint64_t getSubtypeCount() const = 0;
+    virtual const Type* getSubtype(uint64_t childId) const = 0;
+    virtual const std::string& getFieldName(uint64_t childId) const = 0;
+    virtual uint64_t getMaximumLength() const = 0;
+    virtual uint64_t getPrecision() const = 0;
+    virtual uint64_t getScale() const = 0;
+    virtual std::string toString() const = 0;
+
+    /**
+     * Create a row batch for this type.
+     */
+    virtual ORC_UNIQUE_PTR<ColumnVectorBatch> createRowBatch(uint64_t size,
+                                                             MemoryPool& pool
+                                                             ) const = 0;
+
+    /**
+     * Add a new field to a struct type.
+     * @param fieldName the name of the new field
+     * @param fieldType the type of the new field
+     * @return a reference to the struct type
+     */
+    virtual Type* addStructField(const std::string& fieldName,
+                                 ORC_UNIQUE_PTR<Type> fieldType) = 0;
+
+    /**
+     * Add a new child to a union type.
+     * @param fieldType the type of the new field
+     * @return a reference to the union type
+     */
+    virtual Type* addUnionChild(ORC_UNIQUE_PTR<Type> fieldType) = 0;
+  };
+
+  const int64_t DEFAULT_DECIMAL_SCALE = 18;
+  const int64_t DEFAULT_DECIMAL_PRECISION = 38;
+
+  ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
+  ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
+                                      uint64_t maxLength);
+  ORC_UNIQUE_PTR<Type>
+                createDecimalType(uint64_t precision=
+                                    DEFAULT_DECIMAL_PRECISION,
+                                  uint64_t scale=DEFAULT_DECIMAL_SCALE);
+
+  ORC_UNIQUE_PTR<Type> createStructType();
+  ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
+  ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
+                                      ORC_UNIQUE_PTR<Type> value);
+  ORC_UNIQUE_PTR<Type> createUnionType();
+
+}
+#endif

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/include/orc/Vector.hh
----------------------------------------------------------------------
diff --git a/c++/include/orc/Vector.hh b/c++/include/orc/Vector.hh
index 8037400..8f6a0da 100644
--- a/c++/include/orc/Vector.hh
+++ b/c++/include/orc/Vector.hh
@@ -33,81 +33,6 @@
 
 namespace orc {
 
-  enum TypeKind {
-    BOOLEAN = 0,
-    BYTE = 1,
-    SHORT = 2,
-    INT = 3,
-    LONG = 4,
-    FLOAT = 5,
-    DOUBLE = 6,
-    STRING = 7,
-    BINARY = 8,
-    TIMESTAMP = 9,
-    LIST = 10,
-    MAP = 11,
-    STRUCT = 12,
-    UNION = 13,
-    DECIMAL = 14,
-    DATE = 15,
-    VARCHAR = 16,
-    CHAR = 17
-  };
-
-  std::string kind2String(TypeKind t);
-
-  class Type {
-  public:
-    virtual ~Type();
-    virtual int64_t assignIds(int64_t root) = 0;
-    virtual int64_t getColumnId() const = 0;
-    virtual TypeKind getKind() const = 0;
-    virtual uint64_t getSubtypeCount() const = 0;
-    virtual const Type& getSubtype(uint64_t typeId) const = 0;
-    virtual const std::string& getFieldName(uint64_t fieldId) const = 0;
-    virtual uint64_t getMaximumLength() const = 0;
-    virtual uint64_t getPrecision() const = 0;
-    virtual uint64_t getScale() const = 0;
-    virtual std::string toString() const = 0;
-
-    /**
-     * Add a new field to a struct type.
-     * @param fieldType the type of the new field
-     * @param fieldName the name of the new field
-     * @return a reference to the field's type
-     */
-    virtual Type& addStructField(ORC_UNIQUE_PTR<Type> fieldType,
-                                 const std::string& fieldName) = 0;
-  };
-
-  const int64_t DEFAULT_DECIMAL_SCALE = 18;
-  const int64_t DEFAULT_DECIMAL_PRECISION = 38;
-
-  ORC_UNIQUE_PTR<Type> createPrimitiveType(TypeKind kind);
-  ORC_UNIQUE_PTR<Type> createCharType(TypeKind kind,
-				      uint64_t maxLength);
-  ORC_UNIQUE_PTR<Type>
-                createDecimalType(uint64_t precision=
-                                    DEFAULT_DECIMAL_PRECISION,
-                                  uint64_t scale=DEFAULT_DECIMAL_SCALE);
-
-  ORC_UNIQUE_PTR<Type> createStructType();
-  ORC_UNIQUE_PTR<Type>
-    createStructType(std::vector<Type*> types,
-                      std::vector<std::string> fieldNames);
-
-#ifdef ORC_CXX_HAS_INITIALIZER_LIST
-  std::unique_ptr<Type> createStructType(
-      std::initializer_list<std::unique_ptr<Type> > types,
-      std::initializer_list<std::string> fieldNames);
-#endif
-
-  ORC_UNIQUE_PTR<Type> createListType(ORC_UNIQUE_PTR<Type> elements);
-  ORC_UNIQUE_PTR<Type> createMapType(ORC_UNIQUE_PTR<Type> key,
-                                      ORC_UNIQUE_PTR<Type> value);
-  ORC_UNIQUE_PTR<Type>
-    createUnionType(std::vector<Type*> types);
-
   /**
    * The base class for each of the column vectors. This class handles
    * the generic attributes such as number of elements, capacity, and

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index aa90be6..764da01 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -33,11 +33,19 @@
 
 namespace orc {
 
+  class VoidColumnPrinter: public ColumnPrinter {
+  public:
+    VoidColumnPrinter(std::string&);
+    ~VoidColumnPrinter() {}
+    void printRow(uint64_t rowId) override;
+    void reset(const ColumnVectorBatch& batch) override;
+  };
+
   class BooleanColumnPrinter: public ColumnPrinter {
   private:
     const int64_t* data;
   public:
-    BooleanColumnPrinter(std::string&, const Type&);
+    BooleanColumnPrinter(std::string&);
     ~BooleanColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -47,7 +55,7 @@ namespace orc {
   private:
     const int64_t* data;
   public:
-    LongColumnPrinter(std::string&, const Type&);
+    LongColumnPrinter(std::string&);
     ~LongColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -59,7 +67,7 @@ namespace orc {
     const bool isFloat;
 
   public:
-    DoubleColumnPrinter(std::string&, const Type&);
+    DoubleColumnPrinter(std::string&, const Type& type);
     virtual ~DoubleColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -71,7 +79,7 @@ namespace orc {
     const int64_t* nanoseconds;
 
   public:
-    TimestampColumnPrinter(std::string&, const Type&);
+    TimestampColumnPrinter(std::string&);
     ~TimestampColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -82,7 +90,7 @@ namespace orc {
     const int64_t* data;
 
   public:
-    DateColumnPrinter(std::string&, const Type& type);
+    DateColumnPrinter(std::string&);
     ~DateColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -93,7 +101,7 @@ namespace orc {
     const int64_t* data;
     int32_t scale;
   public:
-    Decimal64ColumnPrinter(std::string&, const Type& type);
+    Decimal64ColumnPrinter(std::string&);
     ~Decimal64ColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -104,7 +112,7 @@ namespace orc {
     const Int128* data;
     int32_t scale;
   public:
-    Decimal128ColumnPrinter(std::string&, const Type& type);
+    Decimal128ColumnPrinter(std::string&);
     ~Decimal128ColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -115,7 +123,7 @@ namespace orc {
     const char* const * start;
     const int64_t* length;
   public:
-    StringColumnPrinter(std::string&, const Type& type);
+    StringColumnPrinter(std::string&);
     virtual ~StringColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -126,7 +134,7 @@ namespace orc {
     const char* const * start;
     const int64_t* length;
   public:
-    BinaryColumnPrinter(std::string&, const Type& type);
+    BinaryColumnPrinter(std::string&);
     virtual ~BinaryColumnPrinter() {}
     void printRow(uint64_t rowId) override;
     void reset(const ColumnVectorBatch& batch) override;
@@ -173,6 +181,7 @@ namespace orc {
   class StructColumnPrinter: public ColumnPrinter {
   private:
     std::vector<ColumnPrinter*> fieldPrinter;
+    std::vector<std::string> fieldNames;
   public:
     StructColumnPrinter(std::string&, const Type& type);
     virtual ~StructColumnPrinter();
@@ -189,9 +198,8 @@ namespace orc {
     file.append(ptr, len);
   }
 
-  ColumnPrinter::ColumnPrinter(std::string& _buffer, const Type& _type
-                               ): buffer(_buffer),
-                                  type(_type) {
+  ColumnPrinter::ColumnPrinter(std::string& _buffer
+                               ): buffer(_buffer) {
     notNull = nullptr;
     hasNulls = false;
   }
@@ -210,76 +218,92 @@ namespace orc {
   }
 
   std::unique_ptr<ColumnPrinter> createColumnPrinter(std::string& buffer,
-                                                     const Type& type) {
-    ColumnPrinter *result;
-    switch(static_cast<int64_t>(type.getKind())) {
-    case BOOLEAN:
-      result = new BooleanColumnPrinter(buffer, type);
-      break;
-
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-      result = new LongColumnPrinter(buffer, type);
-      break;
-
-    case FLOAT:
-    case DOUBLE:
-      result = new DoubleColumnPrinter(buffer, type);
-      break;
-
-    case STRING:
-    case VARCHAR :
-    case CHAR:
-      result = new StringColumnPrinter(buffer, type);
-      break;
-
-    case BINARY:
-      result = new BinaryColumnPrinter(buffer, type);
-      break;
-
-    case TIMESTAMP:
-      result = new TimestampColumnPrinter(buffer, type);
-      break;
-
-    case LIST:
-      result = new ListColumnPrinter(buffer, type);
-      break;
-
-    case MAP:
-      result = new MapColumnPrinter(buffer, type);
-      break;
-
-    case STRUCT:
-      result = new StructColumnPrinter(buffer, type);
-      break;
-
-    case DECIMAL:
-      if (type.getPrecision() == 0 || type.getPrecision() > 18) {
-        result = new Decimal128ColumnPrinter(buffer, type);
-      } else {
-        result = new Decimal64ColumnPrinter(buffer, type);
-      }
-      break;
+                                                     const Type* type) {
+    ColumnPrinter *result = nullptr;
+    if (type == nullptr) {
+      result = new VoidColumnPrinter(buffer);
+    } else {
+      switch(static_cast<int64_t>(type->getKind())) {
+      case BOOLEAN:
+        result = new BooleanColumnPrinter(buffer);
+        break;
+
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        result = new LongColumnPrinter(buffer);
+        break;
+
+      case FLOAT:
+      case DOUBLE:
+        result = new DoubleColumnPrinter(buffer, *type);
+        break;
+
+      case STRING:
+      case VARCHAR :
+      case CHAR:
+        result = new StringColumnPrinter(buffer);
+        break;
+
+      case BINARY:
+        result = new BinaryColumnPrinter(buffer);
+        break;
+
+      case TIMESTAMP:
+        result = new TimestampColumnPrinter(buffer);
+        break;
+
+      case LIST:
+        result = new ListColumnPrinter(buffer, *type);
+        break;
+
+      case MAP:
+        result = new MapColumnPrinter(buffer, *type);
+        break;
+
+      case STRUCT:
+        result = new StructColumnPrinter(buffer, *type);
+        break;
+
+      case DECIMAL:
+        if (type->getPrecision() == 0 || type->getPrecision() > 18) {
+          result = new Decimal128ColumnPrinter(buffer);
+        } else {
+          result = new Decimal64ColumnPrinter(buffer);
+        }
+        break;
 
-    case DATE:
-      result = new DateColumnPrinter(buffer, type);
-      break;
+      case DATE:
+        result = new DateColumnPrinter(buffer);
+        break;
 
-    case UNION:
-      result = new UnionColumnPrinter(buffer, type);
-      break;
+      case UNION:
+        result = new UnionColumnPrinter(buffer, *type);
+        break;
 
-    default:
-      throw std::logic_error("unknown batch type");
+      default:
+        throw std::logic_error("unknown batch type");
+      }
     }
     return std::unique_ptr<ColumnPrinter>(result);
   }
 
-  LongColumnPrinter::LongColumnPrinter(std::string& buffer,
-                                       const Type& type
-                                       ): ColumnPrinter(buffer, type),
+  VoidColumnPrinter::VoidColumnPrinter(std::string& buffer
+                                       ): ColumnPrinter(buffer) {
+    // PASS
+  }
+
+  void VoidColumnPrinter::reset(const  ColumnVectorBatch&) {
+    // PASS
+  }
+
+  void VoidColumnPrinter::printRow(uint64_t) {
+    writeString(buffer, "null");
+  }
+
+  LongColumnPrinter::LongColumnPrinter(std::string& buffer
+                                       ): ColumnPrinter(buffer),
                                           data(nullptr) {
     // PASS
   }
@@ -302,7 +326,7 @@ namespace orc {
 
   DoubleColumnPrinter::DoubleColumnPrinter(std::string& buffer,
                                            const Type& type
-                                           ): ColumnPrinter(buffer, type),
+                                           ): ColumnPrinter(buffer),
                                               data(nullptr),
                                               isFloat(type.getKind() == FLOAT){
     // PASS
@@ -324,10 +348,8 @@ namespace orc {
     }
   }
 
-  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer,
-                                                 const  Type& type
-                                                 ): ColumnPrinter(buffer,
-                                                                  type),
+  Decimal64ColumnPrinter::Decimal64ColumnPrinter(std::string& buffer
+                                                 ): ColumnPrinter(buffer),
                                                     data(nullptr),
                                                     scale(0) {
     // PASS
@@ -376,10 +398,8 @@ namespace orc {
     }
   }
 
-  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer,
-                                                   const Type& type
-                                                   ): ColumnPrinter(buffer,
-                                                                    type),
+  Decimal128ColumnPrinter::Decimal128ColumnPrinter(std::string& buffer
+                                                   ): ColumnPrinter(buffer),
                                                       data(nullptr),
                                                       scale(0) {
      // PASS
@@ -399,9 +419,8 @@ namespace orc {
      }
    }
 
-  StringColumnPrinter::StringColumnPrinter(std::string& buffer,
-                                           const Type& type
-                                           ): ColumnPrinter(buffer, type),
+  StringColumnPrinter::StringColumnPrinter(std::string& buffer
+                                           ): ColumnPrinter(buffer),
                                               start(nullptr),
                                               length(nullptr) {
     // PASS
@@ -453,7 +472,7 @@ namespace orc {
 
   ListColumnPrinter::ListColumnPrinter(std::string& buffer,
                                        const Type& type
-                                       ): ColumnPrinter(buffer, type),
+                                       ): ColumnPrinter(buffer),
                                           offsets(nullptr) {
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(0));
   }
@@ -482,7 +501,7 @@ namespace orc {
 
   MapColumnPrinter::MapColumnPrinter(std::string& buffer,
                                      const Type& type
-                                     ): ColumnPrinter(buffer, type),
+                                     ): ColumnPrinter(buffer),
                                         offsets(nullptr) {
     keyPrinter = createColumnPrinter(buffer, type.getSubtype(0));
     elementPrinter = createColumnPrinter(buffer, type.getSubtype(1));
@@ -517,7 +536,7 @@ namespace orc {
 
   UnionColumnPrinter::UnionColumnPrinter(std::string& buffer,
                                            const Type& type
-                                         ): ColumnPrinter(buffer, type),
+                                         ): ColumnPrinter(buffer),
                                             tags(nullptr),
                                             offsets(nullptr) {
     for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
@@ -560,9 +579,11 @@ namespace orc {
 
   StructColumnPrinter::StructColumnPrinter(std::string& buffer,
                                            const Type& type
-                                           ): ColumnPrinter(buffer, type) {
+                                           ): ColumnPrinter(buffer) {
     for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
-      fieldPrinter.push_back(createColumnPrinter(buffer, type.getSubtype(i))
+      fieldNames.push_back(type.getFieldName(i));
+      fieldPrinter.push_back(createColumnPrinter(buffer,
+                                                 type.getSubtype(i))
                              .release());
     }
   }
@@ -592,7 +613,7 @@ namespace orc {
           writeString(buffer, ", ");
         }
         writeChar(buffer, '"');
-        writeString(buffer, type.getFieldName(i).c_str());
+        writeString(buffer, fieldNames[i].c_str());
         writeString(buffer, "\": ");
         fieldPrinter[i]->printRow(rowId);
       }
@@ -600,9 +621,8 @@ namespace orc {
     }
   }
 
-  DateColumnPrinter::DateColumnPrinter(std::string& buffer,
-                                       const Type& type
-                                       ): ColumnPrinter(buffer, type),
+  DateColumnPrinter::DateColumnPrinter(std::string& buffer
+                                       ): ColumnPrinter(buffer),
                                           data(nullptr) {
     // PASS
   }
@@ -627,9 +647,8 @@ namespace orc {
     data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
   }
 
-  BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer,
-                                             const Type& type
-                                             ): ColumnPrinter(buffer, type),
+  BooleanColumnPrinter::BooleanColumnPrinter(std::string& buffer
+                                             ): ColumnPrinter(buffer),
                                                 data(nullptr) {
     // PASS
   }
@@ -647,9 +666,8 @@ namespace orc {
     data = dynamic_cast<const LongVectorBatch&>(batch).data.data();
   }
 
-  BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer,
-                                           const Type& type
-                                           ): ColumnPrinter(buffer, type),
+  BinaryColumnPrinter::BinaryColumnPrinter(std::string& buffer
+                                           ): ColumnPrinter(buffer),
                                               start(nullptr),
                                               length(nullptr) {
     // PASS
@@ -679,10 +697,8 @@ namespace orc {
     length = dynamic_cast<const StringVectorBatch&>(batch).length.data();
   }
 
-  TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer,
-                                                 const Type& type
-                                                 ): ColumnPrinter(buffer,
-                                                                  type),
+  TimestampColumnPrinter::TimestampColumnPrinter(std::string& buffer
+                                                 ): ColumnPrinter(buffer),
                                                     seconds(nullptr),
                                                     nanoseconds(nullptr) {
     // PASS

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnReader.cc
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 84e6db2..ae4d9b6 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -769,7 +769,7 @@ namespace orc {
     switch (static_cast<int64_t>(stripe.getEncoding(columnId).kind())) {
     case proto::ColumnEncoding_Kind_DIRECT:
       for(unsigned int i=0; i < type.getSubtypeCount(); ++i) {
-        const Type& child = type.getSubtype(i);
+        const Type& child = *type.getSubtype(i);
         if (selectedColumns[static_cast<uint64_t>(child.getColumnId())]) {
           children.push_back(buildReader(child, stripe).release());
         }
@@ -836,7 +836,7 @@ namespace orc {
                                             proto::Stream_Kind_LENGTH,
                                             true),
                            false, vers, memoryPool);
-    const Type& childType = type.getSubtype(0);
+    const Type& childType = *type.getSubtype(0);
     if (selectedColumns[static_cast<uint64_t>(childType.getColumnId())]) {
       child = buildReader(childType, stripe);
     }
@@ -929,11 +929,11 @@ namespace orc {
                                             proto::Stream_Kind_LENGTH,
                                             true),
                            false, vers, memoryPool);
-    const Type& keyType = type.getSubtype(0);
+    const Type& keyType = *type.getSubtype(0);
     if (selectedColumns[static_cast<uint64_t>(keyType.getColumnId())]) {
       keyReader = buildReader(keyType, stripe);
     }
-    const Type& elementType = type.getSubtype(1);
+    const Type& elementType = *type.getSubtype(1);
     if (selectedColumns[static_cast<uint64_t>(elementType.getColumnId())]) {
       elementReader = buildReader(elementType, stripe);
     }
@@ -1040,7 +1040,7 @@ namespace orc {
     // figure out which types are selected
     const std::vector<bool> selectedColumns = stripe.getSelectedColumns();
     for(unsigned int i=0; i < numChildren; ++i) {
-      const Type &child = type.getSubtype(i);
+      const Type &child = *type.getSubtype(i);
       if (selectedColumns[static_cast<size_t>(child.getColumnId())]) {
         childrenReader[i] = buildReader(child, stripe).release();
       }

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/ColumnReader.hh
----------------------------------------------------------------------
diff --git a/c++/src/ColumnReader.hh b/c++/src/ColumnReader.hh
index 73db911..142d41e 100644
--- a/c++/src/ColumnReader.hh
+++ b/c++/src/ColumnReader.hh
@@ -45,7 +45,7 @@ namespace orc {
     /**
      * Get the encoding for the given column for this stripe.
      */
-    virtual proto::ColumnEncoding getEncoding(int64_t columnId) const = 0;
+    virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0;
 
     /**
      * Get the stream for the given column/kind in this stripe.
@@ -55,7 +55,7 @@ namespace orc {
      * @return the new stream
      */
     virtual std::unique_ptr<SeekableInputStream>
-                    getStream(int64_t columnId,
+                    getStream(uint64_t columnId,
                               proto::Stream_Kind kind,
                               bool shouldStream) const = 0;
 
@@ -78,7 +78,7 @@ namespace orc {
   class ColumnReader {
   protected:
     std::unique_ptr<ByteRleDecoder> notNullDecoder;
-    int64_t columnId;
+    uint64_t columnId;
     MemoryPool& memoryPool;
 
   public:

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 58f441c..940ef16 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -39,7 +39,10 @@
 namespace orc {
 
   struct ReaderOptionsPrivate {
-    std::list<int64_t> includedColumns;
+    bool setIndexes;
+    bool setNames;
+    std::list<uint64_t> includedColumnIndexes;
+    std::list<std::string> includedColumnNames;
     uint64_t dataStart;
     uint64_t dataLength;
     uint64_t tailLocation;
@@ -50,7 +53,8 @@ namespace orc {
     std::string serializedTail;
 
     ReaderOptionsPrivate() {
-      includedColumns.assign(1,0);
+      setIndexes = false;
+      setNames = false;
       dataStart = 0;
       dataLength = std::numeric_limits<uint64_t>::max();
       tailLocation = std::numeric_limits<uint64_t>::max();
@@ -91,13 +95,20 @@ namespace orc {
     // PASS
   }
 
-  ReaderOptions& ReaderOptions::include(const std::list<int64_t>& include) {
-    privateBits->includedColumns.assign(include.begin(), include.end());
+  ReaderOptions& ReaderOptions::include(const std::list<uint64_t>& include) {
+    privateBits->setIndexes = true;
+    privateBits->includedColumnIndexes.assign(include.begin(), include.end());
+    privateBits->setNames = false;
+    privateBits->includedColumnNames.clear();
     return *this;
   }
 
-  ReaderOptions& ReaderOptions::include(std::vector<int64_t> include) {
-    privateBits->includedColumns.assign(include.begin(), include.end());
+  ReaderOptions& ReaderOptions::include
+       (const std::list<std::string>& include) {
+    privateBits->setNames = true;
+    privateBits->includedColumnNames.assign(include.begin(), include.end());
+    privateBits->setIndexes = false;
+    privateBits->includedColumnIndexes.clear();
     return *this;
   }
 
@@ -128,8 +139,20 @@ namespace orc {
     return privateBits->memoryPool;
   }
 
-  const std::list<int64_t>& ReaderOptions::getInclude() const {
-    return privateBits->includedColumns;
+  bool ReaderOptions::getIndexesSet() const {
+    return privateBits->setIndexes;
+  }
+
+  const std::list<uint64_t>& ReaderOptions::getInclude() const {
+    return privateBits->includedColumnIndexes;
+  }
+
+  bool ReaderOptions::getNamesSet() const {
+    return privateBits->setNames;
+  }
+
+  const std::list<std::string>& ReaderOptions::getIncludeNames() const {
+    return privateBits->includedColumnNames;
   }
 
   uint64_t ReaderOptions::getOffset() const {
@@ -875,6 +898,7 @@ namespace orc {
     DataBuffer<uint64_t> firstRowOfStripe;
     uint64_t numberOfStripes;
     std::unique_ptr<Type> schema;
+    mutable std::unique_ptr<Type> selectedSchema;
 
     // metadata
     mutable std::unique_ptr<proto::Metadata> metadata;
@@ -897,9 +921,8 @@ namespace orc {
     void checkOrcVersion();
     void selectType(const Type& type);
     void readMetadata() const;
-    std::unique_ptr<ColumnVectorBatch> createRowBatch(const Type& type,
-                                                      uint64_t capacity
-                                                      ) const;
+    void updateSelected(const std::list<uint64_t>& fieldIds);
+    void updateSelected(const std::list<std::string>& fieldNames);
 
   public:
     /**
@@ -956,6 +979,8 @@ namespace orc {
 
     const Type& getType() const override;
 
+    const Type& getSelectedType() const override;
+
     const std::vector<bool> getSelectedColumns() const override;
 
     std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size
@@ -1062,30 +1087,23 @@ namespace orc {
     }
 
     schema = convertType(footer->types(0), *footer);
-    schema->assignIds(0);
 
     selectedColumns.assign(static_cast<size_t>(footer->types_size()), false);
-
-    const std::list<int64_t>& included = options.getInclude();
-    for(std::list<int64_t>::const_iterator columnId = included.begin();
-        columnId != included.end(); ++columnId) {
-      if (*columnId == 0) {
-        selectType(*(schema.get()));
-      } else if (*columnId <=
-                 static_cast<int64_t>(schema->getSubtypeCount())) {
-        selectType(schema->getSubtype(static_cast<uint64_t>(*columnId-1)));
-      }
-    }
-    if (included.size() > 0) {
-      selectedColumns[0] = true;
+    if (schema->getKind() == STRUCT && options.getIndexesSet()) {
+      updateSelected(options.getInclude());
+    } else if (schema->getKind() == STRUCT && options.getNamesSet()) {
+      updateSelected(options.getIncludeNames());
+    } else {
+      std::fill(selectedColumns.begin(), selectedColumns.end(), true);
     }
+    selectedColumns[0] = true;
   }
 
   void ReaderImpl::selectType(const Type& type) {
     if (!selectedColumns[static_cast<size_t>(type.getColumnId())]) {
       selectedColumns[static_cast<size_t>(type.getColumnId())] = true;
       for (uint64_t i=0; i < type.getSubtypeCount(); i++) {
-        selectType(type.getSubtype(i));
+        selectType(*type.getSubtype(i));
       }
     }
   }
@@ -1206,6 +1224,14 @@ namespace orc {
     return *(schema.get());
   }
 
+  const Type& ReaderImpl::getSelectedType() const {
+    if (selectedSchema.get() == nullptr) {
+      selectedSchema = buildSelectedType(schema.get(),
+                                         selectedColumns);
+    }
+    return *(selectedSchema.get());
+  }
+
   uint64_t ReaderImpl::getRowNumber() const {
     return previousRow;
   }
@@ -1298,10 +1324,10 @@ namespace orc {
     }
 
     currentStripe = seekToStripe;
-    currentRowInStripe = 0;
-    std::unique_ptr<orc::ColumnVectorBatch> batch =
-        createRowBatch(rowNumber-firstRowOfStripe[currentStripe]);
-    next(*batch);
+    currentRowInStripe = rowNumber - firstRowOfStripe[currentStripe];
+    previousRow = rowNumber;
+    startNextStripe();
+    reader->skip(currentRowInStripe);
   }
 
   bool ReaderImpl::hasCorrectStatistics() const {
@@ -1353,10 +1379,11 @@ namespace orc {
 
     virtual const std::vector<bool> getSelectedColumns() const override;
 
-    virtual proto::ColumnEncoding getEncoding(int64_t columnId) const override;
+    virtual proto::ColumnEncoding getEncoding(uint64_t columnId
+                                              ) const override;
 
     virtual std::unique_ptr<SeekableInputStream>
-    getStream(int64_t columnId,
+    getStream(uint64_t columnId,
               proto::Stream_Kind kind,
               bool shouldStream) const override;
 
@@ -1497,7 +1524,7 @@ namespace orc {
     return reader.getSelectedColumns();
   }
 
-  proto::ColumnEncoding StripeStreamsImpl::getEncoding(int64_t columnId
+  proto::ColumnEncoding StripeStreamsImpl::getEncoding(uint64_t columnId
                                                        ) const {
     return footer.columns(static_cast<int>(columnId));
   }
@@ -1507,7 +1534,7 @@ namespace orc {
   }
 
   std::unique_ptr<SeekableInputStream>
-  StripeStreamsImpl::getStream(int64_t columnId,
+  StripeStreamsImpl::getStream(uint64_t columnId,
                                proto::Stream_Kind kind,
                                bool shouldStream) const {
     uint64_t offset = stripeStart;
@@ -1591,96 +1618,8 @@ namespace orc {
   }
 
   std::unique_ptr<ColumnVectorBatch> ReaderImpl::createRowBatch
-  (const Type& type, uint64_t capacity) const {
-    ColumnVectorBatch* result = nullptr;
-    const Type* subtype;
-    switch (static_cast<int64_t>(type.getKind())) {
-    case BOOLEAN:
-    case BYTE:
-    case SHORT:
-    case INT:
-    case LONG:
-    case DATE:
-      result = new LongVectorBatch(capacity, memoryPool);
-      break;
-    case FLOAT:
-    case DOUBLE:
-      result = new DoubleVectorBatch(capacity, memoryPool);
-      break;
-    case STRING:
-    case BINARY:
-    case CHAR:
-    case VARCHAR:
-      result = new StringVectorBatch(capacity, memoryPool);
-      break;
-    case TIMESTAMP:
-      result = new TimestampVectorBatch(capacity, memoryPool);
-      break;
-    case STRUCT:
-      {
-        StructVectorBatch *structResult =
-          new StructVectorBatch(capacity, memoryPool);
-        result = structResult;
-        for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
-          subtype = &(type.getSubtype(i));
-          if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-            structResult->fields.push_back(createRowBatch(*subtype,
-                                                          capacity).release());
-          }
-        }
-      }
-      break;
-    case LIST:
-      result = new ListVectorBatch(capacity, memoryPool);
-      subtype = &(type.getSubtype(0));
-      if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-        dynamic_cast<ListVectorBatch*>(result)->elements =
-          createRowBatch(*subtype, capacity);
-      }
-      break;
-    case MAP:
-      result = new MapVectorBatch(capacity, memoryPool);
-      subtype = &(type.getSubtype(0));
-      if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-        dynamic_cast<MapVectorBatch*>(result)->keys =
-          createRowBatch(*subtype, capacity);
-      }
-      subtype = &(type.getSubtype(1));
-      if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-        dynamic_cast<MapVectorBatch*>(result)->elements =
-          createRowBatch(*subtype, capacity);
-      }
-      break;
-    case DECIMAL:
-      if (type.getPrecision() == 0 || type.getPrecision() > 18) {
-        result = new Decimal128VectorBatch(capacity, memoryPool);
-      } else {
-        result = new Decimal64VectorBatch(capacity, memoryPool);
-      }
-      break;
-    case UNION:
-      {
-        UnionVectorBatch *unionResult =
-          new UnionVectorBatch(capacity, memoryPool);
-        result = unionResult;
-        for(uint64_t i=0; i < type.getSubtypeCount(); ++i) {
-          subtype = &(type.getSubtype(i));
-          if (selectedColumns[static_cast<size_t>(subtype->getColumnId())]) {
-            unionResult->children.push_back(createRowBatch(*subtype,
-                                                          capacity).release());
-          }
-        }
-      }
-      break;
-    default:
-      throw NotImplementedYet("not supported yet");
-    }
-    return std::unique_ptr<ColumnVectorBatch>(result);
-  }
-
-  std::unique_ptr<ColumnVectorBatch> ReaderImpl::createRowBatch
                                               (uint64_t capacity) const {
-    return createRowBatch(*(schema.get()), capacity);
+    return getSelectedType().createRowBatch(capacity, memoryPool);
   }
 
   void ensureOrcFooter(InputStream* stream,
@@ -2045,4 +1984,43 @@ namespace orc {
     }
   }
 
+  void ReaderImpl::updateSelected(const std::list<uint64_t>& fieldIds) {
+    uint64_t childCount = schema->getSubtypeCount();
+    for(std::list<uint64_t>::const_iterator i = fieldIds.begin();
+        i != fieldIds.end(); ++i) {
+      if (*i >= childCount) {
+        std::stringstream buffer;
+        buffer << "Invalid column selected " << *i << " out of "
+               << childCount;
+        throw ParseError(buffer.str());
+      }
+      const Type& child = *schema->getSubtype(*i);
+      for(size_t c = child.getColumnId();
+          c <= child.getMaximumColumnId(); ++c){
+        selectedColumns[c] = true;
+      }
+    }
+  }
+
+  void ReaderImpl::updateSelected(const std::list<std::string>& fieldNames) {
+    uint64_t childCount = schema->getSubtypeCount();
+    for(std::list<std::string>::const_iterator i = fieldNames.begin();
+        i != fieldNames.end(); ++i) {
+      bool foundMatch = false;
+      for(size_t field=0; field < childCount; ++field) {
+        if (schema->getFieldName(field) == *i) {
+          const Type& child = *schema->getSubtype(field);
+          for(size_t c = child.getColumnId();
+              c <= child.getMaximumColumnId(); ++c){
+            selectedColumns[c] = true;
+          }
+          foundMatch = true;
+          break;
+        }
+      }
+      if (!foundMatch) {
+        throw ParseError("Invalid column selected " + *i);
+      }
+    }
+  }
 }// namespace

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/TypeImpl.cc
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index 4d37d27..d3507b0 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -30,7 +30,9 @@ namespace orc {
   }
 
   TypeImpl::TypeImpl(TypeKind _kind) {
-    columnId = 0;
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
     kind = _kind;
     maxLength = 0;
     precision = 0;
@@ -39,7 +41,9 @@ namespace orc {
   }
 
   TypeImpl::TypeImpl(TypeKind _kind, uint64_t _maxLength) {
-    columnId = 0;
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
     kind = _kind;
     maxLength = _maxLength;
     precision = 0;
@@ -49,7 +53,9 @@ namespace orc {
 
   TypeImpl::TypeImpl(TypeKind _kind, uint64_t _precision,
                      uint64_t _scale) {
-    columnId = 0;
+    parent = nullptr;
+    columnId = -1;
+    maximumColumnId = -1;
     kind = _kind;
     maxLength = 0;
     precision = _precision;
@@ -57,35 +63,13 @@ namespace orc {
     subtypeCount = 0;
   }
 
-  TypeImpl::TypeImpl(TypeKind _kind,
-                     const std::vector<Type*>& types,
-                     const std::vector<std::string>& _fieldNames) {
-    columnId = 0;
-    kind = _kind;
-    maxLength = 0;
-    precision = 0;
-    scale = 0;
-    subtypeCount = static_cast<uint64_t>(types.size());
-    subTypes.assign(types.begin(), types.end());
-    fieldNames.assign(_fieldNames.begin(), _fieldNames.end());
-  }
-
-  TypeImpl::TypeImpl(TypeKind _kind, const std::vector<Type*>& types) {
-    columnId = 0;
-    kind = _kind;
-    maxLength = 0;
-    precision = 0;
-    scale = 0;
-    subtypeCount = static_cast<uint64_t>(types.size());
-    subTypes.assign(types.begin(), types.end());
-  }
-
-  int64_t TypeImpl::assignIds(int64_t root) {
-    columnId = root;
-    int64_t current = root + 1;
+  uint64_t TypeImpl::assignIds(uint64_t root) const {
+    columnId = static_cast<int64_t>(root);
+    uint64_t current = root + 1;
     for(uint64_t i=0; i < subtypeCount; ++i) {
-      current = subTypes[i]->assignIds(current);
+      current = dynamic_cast<TypeImpl*>(subTypes[i])->assignIds(current);
     }
+    maximumColumnId = static_cast<int64_t>(current) - 1;
     return current;
   }
 
@@ -96,8 +80,24 @@ namespace orc {
     }
   }
 
-  int64_t TypeImpl::getColumnId() const {
-    return columnId;
+  void TypeImpl::ensureIdAssigned() const {
+    if (columnId == -1) {
+      const TypeImpl* root = this;
+      while (root->parent != nullptr) {
+        root = root->parent;
+      }
+      root->assignIds(0);
+    }
+  }
+
+  uint64_t TypeImpl::getColumnId() const {
+    ensureIdAssigned();
+    return static_cast<uint64_t>(columnId);
+  }
+
+  uint64_t TypeImpl::getMaximumColumnId() const {
+    ensureIdAssigned();
+    return static_cast<uint64_t>(maximumColumnId);
   }
 
   TypeKind TypeImpl::getKind() const {
@@ -108,8 +108,8 @@ namespace orc {
     return subtypeCount;
   }
 
-  const Type& TypeImpl::getSubtype(uint64_t i) const {
-    return *(subTypes[i]);
+  const Type* TypeImpl::getSubtype(uint64_t i) const {
+    return subTypes[i];
   }
 
   const std::string& TypeImpl::getFieldName(uint64_t i) const {
@@ -128,13 +128,30 @@ namespace orc {
     return scale;
   }
 
-  Type& TypeImpl::addStructField(std::unique_ptr<Type> fieldType,
-                                 const std::string& fieldName) {
-    Type* result = fieldType.release();
-    subTypes.push_back(result);
-    fieldNames.push_back(fieldName);
+  void TypeImpl::setIds(uint64_t _columnId, uint64_t _maxColumnId) {
+    columnId = static_cast<int64_t>(_columnId);
+    maximumColumnId = static_cast<int64_t>(_maxColumnId);
+  }
+
+  void TypeImpl::addChildType(std::unique_ptr<Type> childType) {
+    TypeImpl* child = dynamic_cast<TypeImpl*>(childType.release());
+    subTypes.push_back(child);
+    if (child != nullptr) {
+      child->parent = this;
+    }
     subtypeCount += 1;
-    return *result;
+  }
+
+  Type* TypeImpl::addStructField(const std::string& fieldName,
+                                 std::unique_ptr<Type> fieldType) {
+    addChildType(std::move(fieldType));
+    fieldNames.push_back(fieldName);
+    return this;
+  }
+
+  Type* TypeImpl::addUnionChild(std::unique_ptr<Type> fieldType) {
+    addChildType(std::move(fieldType));
+    return this;
   }
 
   std::string TypeImpl::toString() const {
@@ -160,10 +177,10 @@ namespace orc {
     case TIMESTAMP:
       return "timestamp";
     case LIST:
-      return "array<" + subTypes[0]->toString() + ">";
+      return "array<" + (subTypes[0] ? subTypes[0]->toString() : "void") + ">";
     case MAP:
-      return "map<" + subTypes[0]->toString() + "," +
-        subTypes[1]->toString() +  ">";
+      return "map<" + (subTypes[0] ? subTypes[0]->toString() : "void") + "," +
+        (subTypes[1] ? subTypes[1]->toString() : "void") +  ">";
     case STRUCT: {
       std::string result = "struct<";
       for(size_t i=0; i < subTypes.size(); ++i) {
@@ -210,6 +227,89 @@ namespace orc {
     }
   }
 
+  std::unique_ptr<ColumnVectorBatch>
+  TypeImpl::createRowBatch(uint64_t capacity,
+                           MemoryPool& memoryPool) const {
+    switch (static_cast<int64_t>(kind)) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case DATE:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new LongVectorBatch(capacity, memoryPool));
+
+    case FLOAT:
+    case DOUBLE:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new DoubleVectorBatch(capacity, memoryPool));
+
+    case STRING:
+    case BINARY:
+    case CHAR:
+    case VARCHAR:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new StringVectorBatch(capacity, memoryPool));
+
+    case TIMESTAMP:
+      return std::unique_ptr<ColumnVectorBatch>
+        (new TimestampVectorBatch(capacity, memoryPool));
+
+    case STRUCT: {
+      StructVectorBatch *result = new StructVectorBatch(capacity, memoryPool);
+      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+          result->fields.push_back(getSubtype(i)->
+                                   createRowBatch(capacity,
+                                                  memoryPool).release());
+      }
+      return std::unique_ptr<ColumnVectorBatch>(result);
+    }
+
+    case LIST: {
+      ListVectorBatch* result = new ListVectorBatch(capacity, memoryPool);
+      if (getSubtype(0) != nullptr) {
+        result->elements = getSubtype(0)->createRowBatch(capacity, memoryPool);
+      }
+      return std::unique_ptr<ColumnVectorBatch>(result);
+    }
+
+    case MAP: {
+      MapVectorBatch* result = new MapVectorBatch(capacity, memoryPool);
+      if (getSubtype(0) != nullptr) {
+        result->keys = getSubtype(0)->createRowBatch(capacity, memoryPool);
+      }
+      if (getSubtype(1) != nullptr) {
+        result->elements = getSubtype(1)->createRowBatch(capacity, memoryPool);
+      }
+      return std::unique_ptr<ColumnVectorBatch>(result);
+    }
+
+    case DECIMAL: {
+      if (getPrecision() == 0 || getPrecision() > 18) {
+        return std::unique_ptr<ColumnVectorBatch>
+          (new Decimal128VectorBatch(capacity, memoryPool));
+      } else {
+        return std::unique_ptr<ColumnVectorBatch>
+          (new Decimal64VectorBatch(capacity, memoryPool));
+      }
+    }
+
+    case UNION: {
+      UnionVectorBatch *result = new UnionVectorBatch(capacity, memoryPool);
+      for(uint64_t i=0; i < getSubtypeCount(); ++i) {
+          result->children.push_back(getSubtype(i)->createRowBatch(capacity,
+                                                                   memoryPool)
+                                     .release());
+      }
+      return std::unique_ptr<ColumnVectorBatch>(result);
+    }
+
+    default:
+      throw NotImplementedYet("not supported yet");
+    }
+  }
+
   std::unique_ptr<Type> createPrimitiveType(TypeKind kind) {
     return std::unique_ptr<Type>(new TypeImpl(kind));
   }
@@ -228,55 +328,22 @@ namespace orc {
     return std::unique_ptr<Type>(new TypeImpl(STRUCT));
   }
 
-  std::unique_ptr<Type>
-      createStructType(std::vector<Type*> types,
-                       std::vector<std::string> fieldNames) {
-    std::vector<Type*> typeVector(types.begin(), types.end());
-    std::vector<std::string> fieldVector(fieldNames.begin(), fieldNames.end());
-
-    return std::unique_ptr<Type>(new TypeImpl(STRUCT, typeVector,
-                                              fieldVector));
-  }
-
-#ifdef ORC_CXX_HAS_INITIALIZER_LIST
-  std::unique_ptr<Type> createStructType(
-      std::initializer_list<std::unique_ptr<Type> > types,
-      std::initializer_list<std::string> fieldNames) {
-    std::vector<Type*> typeVector(types.size());
-    std::vector<std::string> fieldVector(types.size());
-    auto currentType = types.begin();
-    auto endType = types.end();
-    size_t current = 0;
-    while (currentType != endType) {
-      typeVector[current++] =
-          const_cast<std::unique_ptr<Type>*>(currentType)->release();
-      ++currentType;
-    }
-    fieldVector.insert(fieldVector.end(), fieldNames.begin(),
-        fieldNames.end());
-    return std::unique_ptr<Type>(new TypeImpl(STRUCT, typeVector,
-        fieldVector));
-  }
-#endif
-
   std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements) {
-    std::vector<Type*> subtypes(1);
-    subtypes[0] = elements.release();
-    return std::unique_ptr<Type>(new TypeImpl(LIST, subtypes));
+    TypeImpl* result = new TypeImpl(LIST);
+    result->addChildType(std::move(elements));
+    return std::unique_ptr<Type>(result);
   }
 
   std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key,
                                       std::unique_ptr<Type> value) {
-    std::vector<Type*> subtypes(2);
-    subtypes[0] = key.release();
-    subtypes[1] = value.release();
-    return std::unique_ptr<Type>(new TypeImpl(MAP, subtypes));
+    TypeImpl* result = new TypeImpl(MAP);
+    result->addChildType(std::move(key));
+    result->addChildType(std::move(value));
+    return std::unique_ptr<Type>(result);
   }
 
-  std::unique_ptr<Type>
-      createUnionType(std::vector<Type*> types) {
-    std::vector<Type*> typeVector(types.begin(), types.end());
-    return std::unique_ptr<Type>(new TypeImpl(UNION, typeVector));
+  std::unique_ptr<Type> createUnionType() {
+    return std::unique_ptr<Type>(new TypeImpl(UNION));
   }
 
   std::string printProtobufMessage(const google::protobuf::Message& message);
@@ -311,59 +378,117 @@ namespace orc {
     case proto::Type_Kind_LIST:
     case proto::Type_Kind_MAP:
     case proto::Type_Kind_UNION: {
-      uint64_t size = static_cast<uint64_t>(type.subtypes_size());
-      std::vector<Type*> typeList(size);
+      TypeImpl* result = new TypeImpl(static_cast<TypeKind>(type.kind()));
       for(int i=0; i < type.subtypes_size(); ++i) {
-        typeList[static_cast<uint64_t>(i)] =
-          convertType(footer.types(static_cast<int>(type.subtypes(i))),
-                      footer).release();
+        result->addUnionChild(convertType(footer.types(static_cast<int>
+                                                       (type.subtypes(i))),
+                                          footer));
       }
-      return std::unique_ptr<Type>
-        (new TypeImpl(static_cast<TypeKind>(type.kind()), typeList));
+      return std::unique_ptr<Type>(result);
     }
 
     case proto::Type_Kind_STRUCT: {
+      TypeImpl* result = new TypeImpl(STRUCT);
       uint64_t size = static_cast<uint64_t>(type.subtypes_size());
       std::vector<Type*> typeList(size);
       std::vector<std::string> fieldList(size);
       for(int i=0; i < type.subtypes_size(); ++i) {
-        typeList[static_cast<uint64_t>(i)] =
-          convertType(footer.types(static_cast<int>(type.subtypes(i))),
-                      footer).release();
-        fieldList[static_cast<uint64_t>(i)] = type.fieldnames(i);
+        result->addStructField(type.fieldnames(i),
+                               convertType(footer.types(static_cast<int>
+                                                        (type.subtypes(i))),
+                                           footer));
       }
-      return std::unique_ptr<Type>
-        (new TypeImpl(STRUCT, typeList, fieldList));
+      return std::unique_ptr<Type>(result);
     }
     default:
       throw NotImplementedYet("Unknown type kind");
     }
   }
 
-  std::string kind2String(TypeKind t) {
-      std::string name ;
-      switch(static_cast<int64_t>(t)) {
-        case BOOLEAN: { name = "BOOLEAN"; break; }
-        case BYTE: { name = "TINYINT"; break; }
-        case SHORT: { name = "SMALLINT"; break; }
-        case INT: { name = "INT"; break; }
-        case LONG: { name = "BIGINT"; break; }
-        case FLOAT: { name = "FLOAT"; break; }
-        case DOUBLE: { name = "DOUBLE"; break; }
-        case STRING: { name = "STRING"; break; }
-        case BINARY: { name = "BINARY"; break; }
-        case TIMESTAMP: { name = "TIMESTAMP"; break; }
-        case LIST: { name = "LIST"; break; }
-        case MAP: { name = "MAP"; break; }
-        case STRUCT: { name = "STRUCT"; break; }
-        case UNION: { name = "UNION"; break; }
-        case DECIMAL: { name = "DECIMAL"; break; }
-        case DATE: { name = "DATE"; break; }
-        case VARCHAR: { name = "VARCHAR"; break; }
-        case CHAR: { name = "CHAR"; break; }
-        default: { name = "UNKNOWN"; break; }
+  /**
+   * Build a clone of the file type, projecting columns from the selected
+   * vector. This routine assumes that the parent of any selected column
+   * is also selected. The column ids are copied from the fileType.
+   * @param fileType the type in the file
+   * @param selected is each column by id selected
+   * @return a clone of the fileType filtered by the selection array
+   */
+  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+                                          const std::vector<bool>& selected) {
+    if (fileType == nullptr || !selected[fileType->getColumnId()]) {
+      return nullptr;
+    }
+
+    TypeImpl* result;
+    switch (fileType->getKind()) {
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case FLOAT:
+    case DOUBLE:
+    case STRING:
+    case BINARY:
+    case TIMESTAMP:
+    case DATE:
+      result = new TypeImpl(fileType->getKind());
+      break;
+
+    case DECIMAL:
+      result= new TypeImpl(fileType->getKind(),
+                           fileType->getPrecision(), fileType->getScale());
+      break;
+
+    case VARCHAR:
+    case CHAR:
+      result = new TypeImpl(fileType->getKind(), fileType->getMaximumLength());
+      break;
+
+    case LIST:
+      result = new TypeImpl(fileType->getKind());
+      result->addChildType(buildSelectedType(fileType->getSubtype(0),
+                                             selected));
+      break;
+
+    case MAP:
+      result = new TypeImpl(fileType->getKind());
+      result->addChildType(buildSelectedType(fileType->getSubtype(0),
+                                             selected));
+      result->addChildType(buildSelectedType(fileType->getSubtype(1),
+                                             selected));
+      break;
+
+    case STRUCT: {
+      result = new TypeImpl(fileType->getKind());
+      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+        std::unique_ptr<Type> childType =
+          buildSelectedType(fileType->getSubtype(child), selected);
+        if (childType.get() != nullptr) {
+          result->addStructField(fileType->getFieldName(child),
+                                 std::move(childType));
+        }
       }
-      return name ;
+      break;
     }
 
+    case UNION: {
+      result = new TypeImpl(fileType->getKind());
+      for(uint64_t child=0; child < fileType->getSubtypeCount(); ++child) {
+        std::unique_ptr<Type> childType =
+          buildSelectedType(fileType->getSubtype(child), selected);
+        if (childType.get() != nullptr) {
+          result->addUnionChild(std::move(childType));
+        }
+      }
+      break;
+    }
+
+    default:
+      throw NotImplementedYet("Unknown type kind");
+    }
+    result->setIds(fileType->getColumnId(), fileType->getMaximumColumnId());
+    return std::unique_ptr<Type>(result);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/src/TypeImpl.hh
----------------------------------------------------------------------
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index 756375f..e2866e4 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -19,7 +19,7 @@
 #ifndef TYPE_IMPL_HH
 #define TYPE_IMPL_HH
 
-#include "orc/Vector.hh"
+#include "orc/Type.hh"
 
 #include "Adaptor.hh"
 #include "wrap/orc-proto-wrapper.hh"
@@ -30,7 +30,9 @@ namespace orc {
 
   class TypeImpl: public Type {
   private:
-    int64_t columnId;
+    TypeImpl* parent;
+    mutable int64_t columnId;
+    mutable int64_t maximumColumnId;
     TypeKind kind;
     std::vector<Type*> subTypes;
     std::vector<std::string> fieldNames;
@@ -56,29 +58,17 @@ namespace orc {
     TypeImpl(TypeKind kind, uint64_t precision,
              uint64_t scale);
 
-    /**
-     * Create struct type.
-     */
-    TypeImpl(TypeKind kind,
-             const std::vector<Type*>& types,
-             const std::vector<std::string>& fieldNames);
-
-    /**
-     * Create list, map, and union type.
-     */
-    TypeImpl(TypeKind kind, const std::vector<Type*>& types);
-
     virtual ~TypeImpl();
 
-    int64_t assignIds(int64_t root) override;
+    uint64_t getColumnId() const override;
 
-    int64_t getColumnId() const override;
+    uint64_t getMaximumColumnId() const override;
 
     TypeKind getKind() const override;
 
     uint64_t getSubtypeCount() const override;
 
-    const Type& getSubtype(uint64_t i) const override;
+    const Type* getSubtype(uint64_t i) const override;
 
     const std::string& getFieldName(uint64_t i) const override;
 
@@ -90,12 +80,51 @@ namespace orc {
 
     std::string toString() const override;
 
-    Type& addStructField(std::unique_ptr<Type> fieldType,
-                         const std::string& fieldName) override;
+    Type* addStructField(const std::string& fieldName,
+                         std::unique_ptr<Type> fieldType) override;
+    Type* addUnionChild(std::unique_ptr<Type> fieldType) override;
+
+    std::unique_ptr<ColumnVectorBatch> createRowBatch(uint64_t size,
+                                                      MemoryPool& memoryPool
+                                                      ) const override;
+
+    /**
+     * Explicitly set the column ids. Only for internal usage.
+     */
+    void setIds(uint64_t columnId, uint64_t maxColumnId);
+
+    /**
+     * Add a child type.
+     */
+    void addChildType(std::unique_ptr<Type> childType);
+
+  private:
+    /**
+     * Assign ids to this node and its children giving this
+     * node rootId.
+     * @param rootId the column id that should be assigned to this node.
+     */
+    uint64_t assignIds(uint64_t rootId) const;
+
+    /**
+     * Ensure that ids are assigned to all of the nodes.
+     */
+    void ensureIdAssigned() const;
   };
 
   std::unique_ptr<Type> convertType(const proto::Type& type,
                                     const proto::Footer& footer);
+
+  /**
+   * Build a clone of the file type, projecting columns from the selected
+   * vector. This routine assumes that the parent of any selected column
+   * is also selected.
+   * @param fileType the type in the file
+   * @param selected is each column by id selected
+   * @return a clone of the fileType filtered by the selection array
+   */
+  std::unique_ptr<Type> buildSelectedType(const Type *fileType,
+                                          const std::vector<bool>& selected);
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index 5a2105a..cd417c8 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -30,6 +30,7 @@ add_executable (test-orc
   TestDriver.cc
   TestInt128.cc
   TestRle.cc
+  TestType.cc
 )
 
 target_link_libraries (test-orc

http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestColumnPrinter.cc
----------------------------------------------------------------------
diff --git a/c++/test/TestColumnPrinter.cc b/c++/test/TestColumnPrinter.cc
index a2afdb3..8cc3a22 100644
--- a/c++/test/TestColumnPrinter.cc
+++ b/c++/test/TestColumnPrinter.cc
@@ -27,7 +27,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(BOOLEAN);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     LongVectorBatch batch(1024, *getDefaultPool());
     const char *expected[] = {"true", "false", "true"};
     batch.numElements = 3;
@@ -60,7 +60,8 @@ namespace orc {
   TEST(TestColumnPrinter, LongColumnPrinter) {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(LONG);
-    std::unique_ptr<ColumnPrinter> printer = createColumnPrinter(line, *type);
+    std::unique_ptr<ColumnPrinter> printer =
+      createColumnPrinter(line, type.get());
     LongVectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 2;
     batch.hasNulls = false;
@@ -94,7 +95,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(DOUBLE);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     DoubleVectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 2;
     batch.hasNulls = false;
@@ -127,7 +128,8 @@ namespace orc {
   TEST(TestColumnPrinter, TimestampColumnPrinter) {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(TIMESTAMP);
-    std::unique_ptr<ColumnPrinter> printer = createColumnPrinter(line, *type);
+    std::unique_ptr<ColumnPrinter> printer =
+      createColumnPrinter(line, type.get());
     TimestampVectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 12;
     batch.hasNulls = false;
@@ -193,7 +195,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(DATE);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     LongVectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 10;
     batch.hasNulls = false;
@@ -243,7 +245,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createDecimalType(16, 5);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     Decimal64VectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 10;
     batch.hasNulls = false;
@@ -294,7 +296,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createDecimalType(30, 5);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     Decimal128VectorBatch batch(1024, *getDefaultPool());
     batch.numElements = 10;
     batch.hasNulls = false;
@@ -345,7 +347,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(STRING);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     StringVectorBatch batch(1024, *getDefaultPool());
     const char *blob= "thisisatest\b\f\n\r\t\\\"'";
     batch.numElements = 5;
@@ -388,7 +390,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createPrimitiveType(BINARY);
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     StringVectorBatch batch(1024, *getDefaultPool());
     char blob[45];
     for(size_t i=0; i < sizeof(blob); ++i) {
@@ -438,7 +440,7 @@ namespace orc {
     std::string line;
     std::unique_ptr<Type> type = createListType(createPrimitiveType(LONG));
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     ListVectorBatch batch(1024, *getDefaultPool());
     LongVectorBatch* longBatch = new LongVectorBatch(1024, *getDefaultPool());
     batch.elements = std::unique_ptr<ColumnVectorBatch>(longBatch);
@@ -490,7 +492,7 @@ namespace orc {
     std::unique_ptr<Type> type = createMapType(createPrimitiveType(LONG),
                                                createPrimitiveType(LONG));
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     MapVectorBatch batch(1024, *getDefaultPool());
     LongVectorBatch* keyBatch = new LongVectorBatch(1024, *getDefaultPool());
     LongVectorBatch* valueBatch = new LongVectorBatch(1024, *getDefaultPool());
@@ -541,15 +543,11 @@ namespace orc {
 
   TEST(TestColumnPrinter, StructColumnPrinter) {
     std::string line;
-    std::vector<std::string> fieldNames;
-    std::vector<Type*> subtypes;
-    fieldNames.push_back("first");
-    fieldNames.push_back("second");
-    subtypes.push_back(createPrimitiveType(LONG).release());
-    subtypes.push_back(createPrimitiveType(LONG).release());
-    std::unique_ptr<Type> type = createStructType(subtypes, fieldNames);
+    std::unique_ptr<Type> type = createStructType();
+    type->addStructField("first", createPrimitiveType(LONG));
+    type->addStructField("second", createPrimitiveType(LONG));
     std::unique_ptr<ColumnPrinter> printer =
-      createColumnPrinter(line, *type);
+      createColumnPrinter(line, type.get());
     StructVectorBatch batch(1024, *getDefaultPool());
     LongVectorBatch* firstBatch = new LongVectorBatch(1024, *getDefaultPool());
     LongVectorBatch* secondBatch =