You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/10/10 20:08:56 UTC

orc git commit: ORC-28. Fix logical column selection to correctly match physical columns.

Repository: orc
Updated Branches:
  refs/heads/master bec0d82fd -> eecd70141


ORC-28. Fix logical column selection to correctly match physical columns.

closes apache/orc#8


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/eecd7014
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/eecd7014
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/eecd7014

Branch: refs/heads/master
Commit: eecd7014186401ff166cfc42bad3eb0f1453aae9
Parents: bec0d82
Author: Aliaksei Sandryhaila <al...@hp.com>
Authored: Thu Sep 24 06:34:55 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Sat Oct 10 11:07:18 2015 -0700

----------------------------------------------------------------------
 c++/src/Reader.cc        | 50 +++++++++++--------------------
 tools/test/TestReader.cc | 70 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/eecd7014/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index cb0647d..c22996b 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -895,8 +895,7 @@ namespace orc {
     proto::StripeFooter getStripeFooter(const proto::StripeInformation& info);
     void startNextStripe();
     void checkOrcVersion();
-    void selectTypeParent(size_t columnId);
-    void selectTypeChildren(size_t columnId);
+    void selectType(const Type& type);
     void readMetadata() const;
     std::unique_ptr<ColumnVectorBatch> createRowBatch(const Type& type,
                                                       uint64_t capacity
@@ -1069,9 +1068,22 @@ namespace orc {
     const std::list<int64_t>& included = options.getInclude();
     for(std::list<int64_t>::const_iterator columnId = included.begin();
         columnId != included.end(); ++columnId) {
-      if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
-        selectTypeParent(static_cast<size_t>(*columnId));
-        selectTypeChildren(static_cast<size_t>(*columnId));
+      if (*columnId == 0) {
+        selectType(*(schema.get()));
+      } else if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
+        selectType(schema->getSubtype(*columnId-1));
+      }
+    }
+    if (included.size() > 0) {
+      selectedColumns[0] = true;
+    }
+  }
+
+  void ReaderImpl::selectType(const Type& type) {
+    if (!selectedColumns[type.getColumnId()]) {
+      selectedColumns[type.getColumnId()] = true;
+      for (uint64_t i=0; i < type.getSubtypeCount(); i++) {
+        selectType(type.getSubtype(i));
       }
     }
   }
@@ -1184,34 +1196,6 @@ namespace orc {
     return false;
   }
 
-  void ReaderImpl::selectTypeParent(size_t columnId) {
-    for(size_t parent=0; parent < columnId; ++parent) {
-      const proto::Type& parentType = footer->types(static_cast<int>(parent));
-      for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
-        uint64_t child = parentType.subtypes(idx);
-        if (child == columnId) {
-          if (!selectedColumns[parent]) {
-            selectedColumns[parent] = true;
-            selectTypeParent(parent);
-            return;
-          }
-        }
-      }
-    }
-  }
-
-  void ReaderImpl::selectTypeChildren(size_t columnId) {
-    if (!selectedColumns[columnId]) {
-      selectedColumns[columnId] = true;
-      const proto::Type& parentType =
-        footer->types(static_cast<int>(columnId));
-      for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
-        uint64_t child = parentType.subtypes(idx);
-        selectTypeChildren(child);
-      }
-    }
-  }
-
   const std::vector<bool> ReaderImpl::getSelectedColumns() const {
     return selectedColumns;
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/eecd7014/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index ebc326f..92fa10a 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -907,6 +907,76 @@ TEST(Reader, futureFormatVersion) {
   EXPECT_EQ("19.99", reader->getFormatVersion());
 }
 
+TEST(Reader, selectColumns) {
+    orc::ReaderOptions opts;
+    std::ostringstream filename;
+    filename << exampleDirectory << "/TestOrcFile.testSeek.orc";
+    std::list<int64_t> cols;
+
+    // All columns
+    cols.push_back(0);
+    opts.include(cols);
+    std::unique_ptr<orc::Reader> reader =
+        orc::createReader(orc::readLocalFile(filename.str()), opts);
+    std::vector<bool> c = reader->getSelectedColumns();
+    EXPECT_EQ(24, c.size());
+    for (unsigned int i=0; i < c.size(); i++) {
+      EXPECT_TRUE(c[i]);
+    }
+
+    // Int column #2
+    cols.clear();
+    cols.push_back(2);
+    opts.include(cols);
+    reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+    c = reader->getSelectedColumns();
+    for (unsigned int i=1; i < c.size(); i++) {
+      if (i==2)
+        EXPECT_TRUE(c[i]);
+      else
+        EXPECT_TRUE(!c[i]);
+    }
+
+    // Struct column #10
+    cols.clear();
+    cols.push_back(10);
+    opts.include(cols);
+    reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+    c = reader->getSelectedColumns();
+    for (unsigned int i=1; i < c.size(); i++) {
+      if (i>=10 && i<=14)
+        EXPECT_TRUE(c[i]);
+      else
+        EXPECT_TRUE(!c[i]);
+    }
+
+    // Array column #11
+    cols.clear();
+    cols.push_back(11);
+    opts.include(cols);
+    reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+    c = reader->getSelectedColumns();
+    for (unsigned int i=1; i < c.size(); i++) {
+      if (i>=15 && i<=18)
+        EXPECT_TRUE(c[i]);
+      else
+        EXPECT_TRUE(!c[i]);
+    }
+
+    // Map column #12
+    cols.clear();
+    cols.push_back(12);
+    opts.include(cols);
+    reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+    c = reader->getSelectedColumns();
+    for (unsigned int i=1; i < c.size(); i++) {
+      if (i>=19 && i<=23)
+        EXPECT_TRUE(c[i]);
+      else
+        EXPECT_TRUE(!c[i]);
+    }
+}
+
   std::map<std::string, std::string> makeMetadata() {
     std::map<std::string, std::string> result;
     result["my.meta"] = "\x01\x02\x03\x04\x05\x06\x07\xff\xfe\x7f\x80";