You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/11/29 00:49:52 UTC
[1/2] orc git commit: HIVE-14089. Complex type support in LLAP is
broken. (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Repository: orc
Updated Branches:
refs/heads/master fb8aec28d -> c9a6dc1ac
HIVE-14089. Complex type support in LLAP is broken. (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/d09af8d9
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/d09af8d9
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/d09af8d9
Branch: refs/heads/master
Commit: d09af8d965a18921b86da991248a496317892d4a
Parents: fb8aec2
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Nov 28 15:15:45 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Nov 28 15:15:45 2016 -0800
----------------------------------------------------------------------
.../orc/impl/ConvertTreeReaderFactory.java | 2 +-
.../org/apache/orc/impl/TreeReaderFactory.java | 102 +++++++++++++++----
java/pom.xml | 2 +-
java/storage-api/pom.xml | 2 +-
.../common/io/encoded/EncodedColumnBatch.java | 52 ++++++----
5 files changed, 114 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index b414b90..ee9e68e 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -292,7 +292,7 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
// Pass-thru.
convertTreeReader.seek(index);
}
http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 1e1ae01..73fe28f 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -126,7 +126,7 @@ public class TreeReaderFactory {
}
}
- static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
+ protected static IntegerReader createIntegerReader(OrcProto.ColumnEncoding.Kind kind,
InStream in,
boolean signed,
Context context) throws IOException {
@@ -162,7 +162,7 @@ public class TreeReaderFactory {
* @param index the indexes loaded from the file
* @throws IOException
*/
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -248,6 +248,10 @@ public class TreeReaderFactory {
public BitFieldReader getPresent() {
return present;
}
+
+ public int getColumnId() {
+ return columnId;
+ }
}
public static class NullTreeReader extends TreeReader {
@@ -309,7 +313,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -360,7 +364,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -427,7 +431,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -494,7 +498,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -562,7 +566,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -616,7 +620,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -710,7 +714,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -821,7 +825,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -950,7 +954,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -1059,7 +1063,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -1130,7 +1134,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -1244,7 +1248,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
reader.seek(index);
}
@@ -1392,7 +1396,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -1542,7 +1546,7 @@ public class TreeReaderFactory {
}
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
seek(index[columnId]);
}
@@ -1748,7 +1752,7 @@ public class TreeReaderFactory {
}
}
- protected static class StructTreeReader extends TreeReader {
+ public static class StructTreeReader extends TreeReader {
protected final TreeReader[] fields;
protected StructTreeReader(int columnId,
@@ -1764,8 +1768,23 @@ public class TreeReaderFactory {
}
}
+ public TreeReader[] getChildReaders() {
+ return fields;
+ }
+
+ protected StructTreeReader(int columnId, InStream present,
+ Context context,
+ OrcProto.ColumnEncoding encoding,
+ TreeReader[] childReaders) throws IOException {
+ super(columnId, present, context);
+ if (encoding != null) {
+ checkEncoding(encoding);
+ }
+ this.fields = childReaders;
+ }
+
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
super.seek(index);
for (TreeReader kid : fields) {
if (kid != null) {
@@ -1847,8 +1866,19 @@ public class TreeReaderFactory {
}
}
+ protected UnionTreeReader(int columnId, InStream present,
+ Context context,
+ OrcProto.ColumnEncoding encoding,
+ TreeReader[] childReaders) throws IOException {
+ super(columnId, present, context);
+ if (encoding != null) {
+ checkEncoding(encoding);
+ }
+ this.fields = childReaders;
+ }
+
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
super.seek(index);
tags.seek(index[columnId]);
for (TreeReader kid : fields) {
@@ -1917,8 +1947,22 @@ public class TreeReaderFactory {
elementReader = createTreeReader(elementType, context);
}
+ protected ListTreeReader(int columnId,
+ InStream present,
+ Context context,
+ InStream data,
+ OrcProto.ColumnEncoding encoding,
+ TreeReader elementReader) throws IOException {
+ super(columnId, present, context);
+ if (data != null && encoding != null) {
+ checkEncoding(encoding);
+ this.lengths = createIntegerReader(encoding.getKind(), data, false, context);
+ }
+ this.elementReader = elementReader;
+ }
+
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
super.seek(index);
lengths.seek(index[columnId]);
elementReader.seek(index);
@@ -1996,8 +2040,24 @@ public class TreeReaderFactory {
valueReader = createTreeReader(valueType, context);
}
+ protected MapTreeReader(int columnId,
+ InStream present,
+ Context context,
+ InStream data,
+ OrcProto.ColumnEncoding encoding,
+ TreeReader keyReader,
+ TreeReader valueReader) throws IOException {
+ super(columnId, present, context);
+ if (data != null && encoding != null) {
+ checkEncoding(encoding);
+ this.lengths = createIntegerReader(encoding.getKind(), data, false, context);
+ }
+ this.keyReader = keyReader;
+ this.valueReader = valueReader;
+ }
+
@Override
- void seek(PositionProvider[] index) throws IOException {
+ public void seek(PositionProvider[] index) throws IOException {
super.seek(index);
lengths.seek(index[columnId]);
keyReader.seek(index);
http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/pom.xml
----------------------------------------------------------------------
diff --git a/java/pom.xml b/java/pom.xml
index 338c2ba..09b45ae 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -70,7 +70,7 @@
<test.tmp.dir>${project.build.directory}/testing-tmp</test.tmp.dir>
<hadoop.version>2.6.4</hadoop.version>
- <storage-api.version>2.1.1-pre-orc</storage-api.version>
+ <storage-api.version>2.1.1.2-pre-orc</storage-api.version>
</properties>
<build>
http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/storage-api/pom.xml
----------------------------------------------------------------------
diff --git a/java/storage-api/pom.xml b/java/storage-api/pom.xml
index 85f8060..a8543b4 100644
--- a/java/storage-api/pom.xml
+++ b/java/storage-api/pom.xml
@@ -27,7 +27,7 @@
<artifactId>hive-storage-api</artifactId>
<!-- remove our custom version of storage-api once we get the changes
released as hive 2.1.1 -->
- <version>2.1.1-pre-orc</version>
+ <version>2.1.1.2-pre-orc</version>
<packaging>jar</packaging>
<name>Hive Storage API</name>
http://git-wip-us.apache.org/repos/asf/orc/blob/d09af8d9/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
----------------------------------------------------------------------
diff --git a/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java b/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
index 907181e..13772c9 100644
--- a/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
+++ b/java/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/EncodedColumnBatch.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.io.encoded;
+import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
@@ -76,14 +77,17 @@ public class EncodedColumnBatch<BatchKey> {
/** The key that is used to map this batch to source location. */
protected BatchKey batchKey;
/**
- * Stream data for each stream, for each included column.
- * For each column, streams are indexed by kind, with missing elements being null.
+ * Stream data for each column that has true in the corresponding hasData position.
+ * For each column, streams are indexed by kind (for ORC), with missing elements being null.
*/
protected ColumnStreamData[][] columnData;
- /** Column indexes included in the batch. Correspond to columnData elements. */
- protected int[] columnIxs;
+ /** Indicates which columns have data. Correspond to columnData elements. */
+ protected boolean[] hasData;
public void reset() {
+ if (hasData != null) {
+ Arrays.fill(hasData, false);
+ }
if (columnData == null) return;
for (int i = 0; i < columnData.length; ++i) {
if (columnData[i] == null) continue;
@@ -93,37 +97,37 @@ public class EncodedColumnBatch<BatchKey> {
}
}
- public void initColumn(int colIxMod, int colIx, int streamCount) {
- columnIxs[colIxMod] = colIx;
- if (columnData[colIxMod] == null || columnData[colIxMod].length != streamCount) {
- columnData[colIxMod] = new ColumnStreamData[streamCount];
+ public void initColumn(int colIx, int streamCount) {
+ hasData[colIx] = true;
+ if (columnData[colIx] == null || columnData[colIx].length != streamCount) {
+ columnData[colIx] = new ColumnStreamData[streamCount];
}
}
- public void setStreamData(int colIxMod, int streamKind, ColumnStreamData csd) {
- columnData[colIxMod][streamKind] = csd;
- }
-
- public void setAllStreamsData(int colIxMod, int colIx, ColumnStreamData[] sbs) {
- columnIxs[colIxMod] = colIx;
- columnData[colIxMod] = sbs;
+ public void setStreamData(int colIx, int streamIx, ColumnStreamData csd) {
+ assert hasData[colIx];
+ columnData[colIx][streamIx] = csd;
}
public BatchKey getBatchKey() {
return batchKey;
}
- public ColumnStreamData[][] getColumnData() {
- return columnData;
+ public ColumnStreamData[] getColumnData(int colIx) {
+ if (!hasData[colIx]) throw new AssertionError("No data for column " + colIx);
+ return columnData[colIx];
}
- public int[] getColumnIxs() {
- return columnIxs;
+ public int getTotalColCount() {
+ return columnData.length; // Includes the columns that have no data
}
protected void resetColumnArrays(int columnCount) {
- if (columnIxs != null && columnCount == columnIxs.length) return;
- columnIxs = new int[columnCount];
+ if (hasData != null && columnCount == hasData.length) {
+ Arrays.fill(hasData, false);
+ return;
+ }
+ hasData = new boolean[columnCount];
ColumnStreamData[][] columnData = new ColumnStreamData[columnCount][];
if (this.columnData != null) {
for (int i = 0; i < Math.min(columnData.length, this.columnData.length); ++i) {
@@ -132,4 +136,8 @@ public class EncodedColumnBatch<BatchKey> {
}
this.columnData = columnData;
}
-}
\ No newline at end of file
+
+ public boolean hasData(int colIx) {
+ return hasData[colIx];
+ }
+}
[2/2] orc git commit: HIVE-15124. Fix OrcInputFormat to use reader's
schema for include boolean array.
Posted by om...@apache.org.
HIVE-15124. Fix OrcInputFormat to use reader's schema for include
boolean array.
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/c9a6dc1a
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/c9a6dc1a
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/c9a6dc1a
Branch: refs/heads/master
Commit: c9a6dc1ac4b15a54184203b2151151564f98874b
Parents: d09af8d
Author: Owen O'Malley <om...@apache.org>
Authored: Mon Nov 28 16:48:34 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Mon Nov 28 16:48:34 2016 -0800
----------------------------------------------------------------------
.../java/org/apache/orc/impl/ReaderImpl.java | 12 ------
.../org/apache/orc/impl/RecordReaderImpl.java | 25 ++++++------
.../org/apache/orc/impl/SchemaEvolution.java | 40 +++++++++++++++++++-
.../org/apache/orc/impl/TreeReaderFactory.java | 3 +-
4 files changed, 50 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 039365c..ad3f8ba 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -585,18 +585,6 @@ public class ReaderImpl implements Reader {
@Override
public RecordReader rows(Options options) throws IOException {
LOG.info("Reading ORC rows from " + path + " with " + options);
- boolean[] include = options.getInclude();
- // if included columns is null, then include all columns
- if (include == null) {
- options = options.clone();
- TypeDescription readSchema = options.getSchema();
- if (readSchema == null) {
- readSchema = schema;
- }
- include = new boolean[readSchema.getMaximumId() + 1];
- Arrays.fill(include, true);
- options.include(include);
- }
return new RecordReaderImpl(this, options);
}
http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index c7ce2bb..362dd3d 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -77,8 +77,8 @@ public class RecordReaderImpl implements RecordReader {
private final List<OrcProto.Type> types;
private final int bufferSize;
private final SchemaEvolution evolution;
- private final boolean[] included;
- private final boolean[] writerIncluded;
+ // the file included columns indexed by the file's column ids.
+ private final boolean[] fileIncluded;
private final long rowIndexStride;
private long rowInStripe = 0;
private int currentStripe = -1;
@@ -138,9 +138,7 @@ public class RecordReaderImpl implements RecordReader {
protected RecordReaderImpl(ReaderImpl fileReader,
Reader.Options options) throws IOException {
- this.included = options.getInclude();
this.writerVersion = fileReader.getWriterVersion();
- included[0] = true;
if (options.getSchema() == null) {
if (LOG.isInfoEnabled()) {
LOG.info("Reader schema not provided -- using file schema " +
@@ -174,7 +172,7 @@ public class RecordReaderImpl implements RecordReader {
if (sarg != null && rowIndexStride != 0) {
sargApp = new SargApplier(sarg, options.getColumnNames(),
rowIndexStride,
- included.length, evolution,
+ evolution,
writerVersion);
} else {
sargApp = null;
@@ -198,7 +196,7 @@ public class RecordReaderImpl implements RecordReader {
zeroCopy = OrcConf.USE_ZEROCOPY.getBoolean(fileReader.conf);
}
if (options.getDataReader() != null) {
- this.dataReader = options.getDataReader();
+ this.dataReader = options.getDataReader().clone();
} else {
this.dataReader = RecordReaderUtils.createDefaultDataReader(
DataReaderProperties.builder()
@@ -224,7 +222,7 @@ public class RecordReaderImpl implements RecordReader {
.skipCorrupt(skipCorrupt);
reader = TreeReaderFactory.createTreeReader(evolution.getReaderSchema(), readerContext);
- writerIncluded = evolution.getFileIncluded();
+ this.fileIncluded = evolution.getFileIncluded();
indexes = new OrcProto.RowIndex[types.size()];
bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
bloomFilterKind = new OrcProto.Stream.Kind[types.size()];
@@ -733,7 +731,6 @@ public class RecordReaderImpl implements RecordReader {
public SargApplier(SearchArgument sarg,
String[] columnNames,
long rowIndexStride,
- int includedCount,
SchemaEvolution evolution,
OrcFile.WriterVersion writerVersion) {
this.writerVersion = writerVersion;
@@ -744,7 +741,7 @@ public class RecordReaderImpl implements RecordReader {
this.rowIndexStride = rowIndexStride;
// included will not be null, row options will fill the array with
// trues if null
- sargColumns = new boolean[includedCount];
+ sargColumns = new boolean[evolution.getFileIncluded().length];
for (int i : filterColumns) {
// filter columns may have -1 as index which could be partition
// column in SARG.
@@ -832,7 +829,7 @@ public class RecordReaderImpl implements RecordReader {
if (sargApp == null) {
return null;
}
- readRowIndex(currentStripe, writerIncluded, sargApp.sargColumns);
+ readRowIndex(currentStripe, fileIncluded, sargApp.sargColumns);
return sargApp.pickRowGroups(stripes.get(currentStripe), indexes,
bloomFilterKind, bloomFilterIndices, false);
}
@@ -890,7 +887,7 @@ public class RecordReaderImpl implements RecordReader {
}
private boolean isFullRead() {
- for (boolean isColumnPresent : writerIncluded){
+ for (boolean isColumnPresent : fileIncluded){
if (!isColumnPresent){
return false;
}
@@ -1005,7 +1002,7 @@ public class RecordReaderImpl implements RecordReader {
private void readPartialDataStreams(StripeInformation stripe) throws IOException {
List<OrcProto.Stream> streamList = stripeFooter.getStreamsList();
DiskRangeList toRead = planReadPartialDataStreams(streamList,
- indexes, writerIncluded, includedRowGroups, codec != null,
+ indexes, fileIncluded, includedRowGroups, codec != null,
stripeFooter.getColumnsList(), types, bufferSize, true);
if (LOG.isDebugEnabled()) {
LOG.debug("chunks = " + RecordReaderUtils.stringifyDiskRanges(toRead));
@@ -1015,7 +1012,7 @@ public class RecordReaderImpl implements RecordReader {
LOG.debug("merge = " + RecordReaderUtils.stringifyDiskRanges(bufferChunks));
}
- createStreams(streamList, bufferChunks, writerIncluded, codec, bufferSize, streams);
+ createStreams(streamList, bufferChunks, fileIncluded, codec, bufferSize, streams);
}
/**
@@ -1227,7 +1224,7 @@ public class RecordReaderImpl implements RecordReader {
currentStripe = rightStripe;
readStripe();
}
- readRowIndex(currentStripe, writerIncluded, sargApp == null ? null : sargApp.sargColumns);
+ readRowIndex(currentStripe, fileIncluded, sargApp == null ? null : sargApp.sargColumns);
// if we aren't to the right row yet, advance in the stripe.
advanceToNextRow(reader, rowNumber, true);
http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 20adfd8..b76a319 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -39,6 +39,8 @@ public class SchemaEvolution {
private final TypeDescription[] readerFileTypes;
// indexed by reader column id
private final boolean[] readerIncluded;
+ // the offset to the first column id ignoring any ACID columns
+ private final int readerColumnOffset;
// indexed by file column id
private final boolean[] fileIncluded;
private final TypeDescription fileSchema;
@@ -76,12 +78,20 @@ public class SchemaEvolution {
this.hasConversion = false;
this.fileSchema = fileSchema;
isAcid = checkAcidSchema(fileSchema);
+ this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
if (readerSchema != null) {
if (isAcid) {
this.readerSchema = createEventSchema(readerSchema);
} else {
this.readerSchema = readerSchema;
}
+ if (readerIncluded != null &&
+ readerIncluded.length + readerColumnOffset !=
+ this.readerSchema.getMaximumId() + 1) {
+ throw new IllegalArgumentException("Include vector the wrong length: "
+ + this.readerSchema.toJson() + " with include length "
+ + readerIncluded.length);
+ }
this.readerFileTypes =
new TypeDescription[this.readerSchema.getMaximumId() + 1];
int positionalLevels = 0;
@@ -106,6 +116,13 @@ public class SchemaEvolution {
this.readerSchema = fileSchema;
this.readerFileTypes =
new TypeDescription[this.readerSchema.getMaximumId() + 1];
+ if (readerIncluded != null &&
+ readerIncluded.length + readerColumnOffset !=
+ this.readerSchema.getMaximumId() + 1) {
+ throw new IllegalArgumentException("Include vector the wrong length: "
+ + this.readerSchema.toJson() + " with include length "
+ + readerIncluded.length);
+ }
buildIdentityConversion(this.readerSchema);
}
this.ppdSafeConversion = populatePpdSafeConversion();
@@ -170,10 +187,18 @@ public class SchemaEvolution {
return readerFileTypes[id];
}
+ /**
+ * Get whether each column is included from the reader's point of view.
+ * @return a boolean array indexed by reader column id
+ */
public boolean[] getReaderIncluded() {
return readerIncluded;
}
+ /**
+ * Get whether each column is included from the file's point of view.
+ * @return a boolean array indexed by file column id
+ */
public boolean[] getFileIncluded() {
return fileIncluded;
}
@@ -273,6 +298,17 @@ public class SchemaEvolution {
}
/**
+ * Should we read the given reader column?
+ * @param readerId the id of column in the extended reader schema
+ * @return true if the column should be read
+ */
+ public boolean includeReaderColumn(int readerId) {
+ return readerIncluded == null ||
+ readerId <= readerColumnOffset ||
+ readerIncluded[readerId - readerColumnOffset];
+ }
+
+ /**
* Build the mapping from the file type to the reader type. For pre-HIVE-4243
* ORC files, the top level structure is matched using position within the
* row. Otherwise, structs fields are matched by name.
@@ -287,7 +323,7 @@ public class SchemaEvolution {
TypeDescription readerType,
int positionalLevels) {
// if the column isn't included, don't map it
- if (readerIncluded != null && !readerIncluded[readerType.getId()]) {
+ if (!includeReaderColumn(readerType.getId())) {
return;
}
boolean isOk = true;
@@ -400,7 +436,7 @@ public class SchemaEvolution {
void buildIdentityConversion(TypeDescription readerType) {
int id = readerType.getId();
- if (readerIncluded != null && !readerIncluded[id]) {
+ if (!includeReaderColumn(id)) {
return;
}
if (readerFileTypes[id] != null) {
http://git-wip-us.apache.org/repos/asf/orc/blob/c9a6dc1a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 73fe28f..be8d6b2 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -2132,8 +2132,7 @@ public class TreeReaderFactory {
final SchemaEvolution evolution = context.getSchemaEvolution();
final boolean[] included = evolution.getReaderIncluded();
TypeDescription fileType = evolution.getFileType(readerType);
- if (fileType == null ||
- (included != null && !included[readerType.getId()])) {
+ if (fileType == null || !evolution.includeReaderColumn(readerType.getId())){
return new NullTreeReader(0);
}
TypeDescription.Category readerTypeCategory = readerType.getCategory();