You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2015/10/10 20:08:56 UTC
orc git commit: ORC-28. Fix logical column selection to correctly
match physical columns.
Repository: orc
Updated Branches:
refs/heads/master bec0d82fd -> eecd70141
ORC-28. Fix logical column selection to correctly match physical columns.
closes apache/orc#8
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/eecd7014
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/eecd7014
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/eecd7014
Branch: refs/heads/master
Commit: eecd7014186401ff166cfc42bad3eb0f1453aae9
Parents: bec0d82
Author: Aliaksei Sandryhaila <al...@hp.com>
Authored: Thu Sep 24 06:34:55 2015 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Sat Oct 10 11:07:18 2015 -0700
----------------------------------------------------------------------
c++/src/Reader.cc | 50 +++++++++++--------------------
tools/test/TestReader.cc | 70 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 87 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/eecd7014/c++/src/Reader.cc
----------------------------------------------------------------------
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index cb0647d..c22996b 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -895,8 +895,7 @@ namespace orc {
proto::StripeFooter getStripeFooter(const proto::StripeInformation& info);
void startNextStripe();
void checkOrcVersion();
- void selectTypeParent(size_t columnId);
- void selectTypeChildren(size_t columnId);
+ void selectType(const Type& type);
void readMetadata() const;
std::unique_ptr<ColumnVectorBatch> createRowBatch(const Type& type,
uint64_t capacity
@@ -1069,9 +1068,22 @@ namespace orc {
const std::list<int64_t>& included = options.getInclude();
for(std::list<int64_t>::const_iterator columnId = included.begin();
columnId != included.end(); ++columnId) {
- if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
- selectTypeParent(static_cast<size_t>(*columnId));
- selectTypeChildren(static_cast<size_t>(*columnId));
+ if (*columnId == 0) {
+ selectType(*(schema.get()));
+ } else if (*columnId <= static_cast<int64_t>(schema->getSubtypeCount())) {
+ selectType(schema->getSubtype(*columnId-1));
+ }
+ }
+ if (included.size() > 0) {
+ selectedColumns[0] = true;
+ }
+ }
+
+ void ReaderImpl::selectType(const Type& type) {
+ if (!selectedColumns[type.getColumnId()]) {
+ selectedColumns[type.getColumnId()] = true;
+ for (uint64_t i=0; i < type.getSubtypeCount(); i++) {
+ selectType(type.getSubtype(i));
}
}
}
@@ -1184,34 +1196,6 @@ namespace orc {
return false;
}
- void ReaderImpl::selectTypeParent(size_t columnId) {
- for(size_t parent=0; parent < columnId; ++parent) {
- const proto::Type& parentType = footer->types(static_cast<int>(parent));
- for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
- uint64_t child = parentType.subtypes(idx);
- if (child == columnId) {
- if (!selectedColumns[parent]) {
- selectedColumns[parent] = true;
- selectTypeParent(parent);
- return;
- }
- }
- }
- }
- }
-
- void ReaderImpl::selectTypeChildren(size_t columnId) {
- if (!selectedColumns[columnId]) {
- selectedColumns[columnId] = true;
- const proto::Type& parentType =
- footer->types(static_cast<int>(columnId));
- for(int idx=0; idx < parentType.subtypes_size(); ++idx) {
- uint64_t child = parentType.subtypes(idx);
- selectTypeChildren(child);
- }
- }
- }
-
const std::vector<bool> ReaderImpl::getSelectedColumns() const {
return selectedColumns;
}
http://git-wip-us.apache.org/repos/asf/orc/blob/eecd7014/tools/test/TestReader.cc
----------------------------------------------------------------------
diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc
index ebc326f..92fa10a 100644
--- a/tools/test/TestReader.cc
+++ b/tools/test/TestReader.cc
@@ -907,6 +907,76 @@ TEST(Reader, futureFormatVersion) {
EXPECT_EQ("19.99", reader->getFormatVersion());
}
+TEST(Reader, selectColumns) {
+ orc::ReaderOptions opts;
+ std::ostringstream filename;
+ filename << exampleDirectory << "/TestOrcFile.testSeek.orc";
+ std::list<int64_t> cols;
+
+ // All columns
+ cols.push_back(0);
+ opts.include(cols);
+ std::unique_ptr<orc::Reader> reader =
+ orc::createReader(orc::readLocalFile(filename.str()), opts);
+ std::vector<bool> c = reader->getSelectedColumns();
+ EXPECT_EQ(24, c.size());
+ for (unsigned int i=0; i < c.size(); i++) {
+ EXPECT_TRUE(c[i]);
+ }
+
+ // Int column #2
+ cols.clear();
+ cols.push_back(2);
+ opts.include(cols);
+ reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+ c = reader->getSelectedColumns();
+ for (unsigned int i=1; i < c.size(); i++) {
+ if (i==2)
+ EXPECT_TRUE(c[i]);
+ else
+ EXPECT_TRUE(!c[i]);
+ }
+
+ // Struct column #10
+ cols.clear();
+ cols.push_back(10);
+ opts.include(cols);
+ reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+ c = reader->getSelectedColumns();
+ for (unsigned int i=1; i < c.size(); i++) {
+ if (i>=10 && i<=14)
+ EXPECT_TRUE(c[i]);
+ else
+ EXPECT_TRUE(!c[i]);
+ }
+
+ // Array column #11
+ cols.clear();
+ cols.push_back(11);
+ opts.include(cols);
+ reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+ c = reader->getSelectedColumns();
+ for (unsigned int i=1; i < c.size(); i++) {
+ if (i>=15 && i<=18)
+ EXPECT_TRUE(c[i]);
+ else
+ EXPECT_TRUE(!c[i]);
+ }
+
+ // Map column #12
+ cols.clear();
+ cols.push_back(12);
+ opts.include(cols);
+ reader = orc::createReader(orc::readLocalFile(filename.str()), opts);
+ c = reader->getSelectedColumns();
+ for (unsigned int i=1; i < c.size(); i++) {
+ if (i>=19 && i<=23)
+ EXPECT_TRUE(c[i]);
+ else
+ EXPECT_TRUE(!c[i]);
+ }
+}
+
std::map<std::string, std::string> makeMetadata() {
std::map<std::string, std::string> result;
result["my.meta"] = "\x01\x02\x03\x04\x05\x06\x07\xff\xfe\x7f\x80";