You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by go...@apache.org on 2019/08/09 00:28:41 UTC
[orc] branch master updated: ORC-540: PPD: Positional lookups for
columns with Options.forcePositionalEvolution(true)
This is an automated email from the ASF dual-hosted git repository.
gopalv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new a9c0ca4 ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true)
a9c0ca4 is described below
commit a9c0ca43cffab3acd73d19ac91297e1f507277c2
Author: Laszlo Bodor <bo...@gmail.com>
AuthorDate: Thu Aug 8 16:12:22 2019 -0700
ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true)
Fixes #418
Signed-off-by: Gopal V <go...@apache.org>
---
.../java/org/apache/orc/impl/RecordReaderImpl.java | 11 +++++-
.../java/org/apache/orc/impl/SchemaEvolution.java | 15 ++++++--
.../org/apache/orc/impl/TestRecordReaderImpl.java | 41 ++++++++++++++++++++--
3 files changed, 62 insertions(+), 5 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index d6ae7d5..de59579 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -97,8 +97,17 @@ public class RecordReaderImpl implements RecordReader {
static int findColumns(SchemaEvolution evolution,
String columnName) {
try {
- return evolution.getFileSchema().findSubtype(columnName).getId();
+ final TypeDescription targetSchema;
+ if (evolution.getPositionalColumns()) {
+ targetSchema = evolution.getReaderBaseSchema();
+ } else {
+ targetSchema = evolution.getFileSchema();
+ }
+ return targetSchema.findSubtype(columnName).getId();
} catch (IllegalArgumentException e) {
+ if (LOG.isDebugEnabled()){
+ LOG.debug("{}", e.getMessage());
+ }
return -1;
}
}
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index d93c9bd..4fdc934 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -59,6 +59,9 @@ public class SchemaEvolution {
// indexed by reader column id
private final boolean[] ppdSafeConversion;
+ // columns are indexed, not named between Reader & File schema
+ private final boolean positionalColumns;
+
private static final Logger LOG =
LoggerFactory.getLogger(SchemaEvolution.class);
private static final Pattern missingMetadataPattern =
@@ -82,8 +85,8 @@ public class SchemaEvolution {
this.hasConversion = false;
this.isOnlyImplicitConversion = true;
this.fileSchema = fileSchema;
- isAcid = checkAcidSchema(fileSchema);
- includeAcidColumns = options.getIncludeAcidColumns();
+ this.isAcid = checkAcidSchema(fileSchema);
+ this.includeAcidColumns = options.getIncludeAcidColumns();
this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
if (readerSchema != null) {
if (isAcid) {
@@ -134,6 +137,7 @@ public class SchemaEvolution {
}
buildIdentityConversion(this.readerSchema);
}
+ this.positionalColumns = options.getForcePositionalEvolution();
this.ppdSafeConversion = populatePpdSafeConversion();
}
@@ -238,6 +242,13 @@ public class SchemaEvolution {
}
/**
+ * Get whether the columns are handled via position or name
+ */
+ public boolean getPositionalColumns() {
+ return this.positionalColumns;
+ }
+
+ /**
* Determine if there is implicit conversion from a file to reader type.
*
* Implicit conversions are:
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 0763363..254ffcc 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -68,6 +68,7 @@ import org.apache.orc.impl.writer.StreamOptions;
import org.apache.orc.util.BloomFilter;
import org.apache.orc.DataReader;
import org.apache.orc.RecordReader;
+import org.apache.orc.TestVectorOrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.impl.RecordReaderImpl.Location;
@@ -84,8 +85,6 @@ import org.apache.orc.OrcProto;
import org.apache.orc.util.BloomFilterIO;
import org.apache.orc.util.BloomFilterUtf8;
import org.junit.Test;
-import org.mockito.MockSettings;
-import org.mockito.Mockito;
public class TestRecordReaderImpl {
@@ -102,6 +101,44 @@ public class TestRecordReaderImpl {
assertEquals(3, RecordReaderImpl.findColumns(evo, "e"));
}
+ @Test
+ public void testForcePositionalEvolution() throws Exception {
+ Configuration conf = new Configuration();
+
+ Path oldFilePath = new Path(TestVectorOrcFile.getFileFromClasspath("orc-file-11-format.orc"));
+ Reader reader = OrcFile.createReader(oldFilePath,
+ OrcFile.readerOptions(conf).filesystem(FileSystem.getLocal(conf)));
+
+ TypeDescription fileSchema =
+ TypeDescription.fromString("struct<col0:boolean,col1:tinyint,col2:smallint,"
+ + "col3:int,col4:bigint,col5:float,col6:double,col7:"
+ + "binary,col8:string,col9:struct<list:array<struct<int1:int,"
+ + "string1:string>>>,col10:array<struct<int1:int,string1:string>>,"
+ + "col11:map<string,struct<int1:int,string1:string>>,col12:timestamp,"
+ + "col13:decimal(38,10)>");
+
+ SchemaEvolution evo = new SchemaEvolution(fileSchema, reader.getSchema(),
+ new Reader.Options(conf).forcePositionalEvolution(true));
+ assertEquals(4, RecordReaderImpl.findColumns(evo, "int1"));
+
+ evo = new SchemaEvolution(fileSchema, reader.getSchema(),
+ new Reader.Options(conf).forcePositionalEvolution(false));
+ assertEquals(-1, RecordReaderImpl.findColumns(evo, "int1"));
+
+ TypeDescription acidSchema = SchemaEvolution.createEventSchema(fileSchema);
+
+ SchemaEvolution evoAcid =
+ new SchemaEvolution(acidSchema, reader.getSchema(),
+ new Reader.Options(conf).forcePositionalEvolution(true));
+ // ahead by 6 for 1 struct + 5 for row-id
+ assertEquals(6+4, RecordReaderImpl.findColumns(evoAcid, "int1"));
+
+ evoAcid =
+ new SchemaEvolution(acidSchema, reader.getSchema(),
+ new Reader.Options(conf).forcePositionalEvolution(false));
+ assertEquals(-1, RecordReaderImpl.findColumns(evoAcid, "int1"));
+ }
+
/**
* Create a predicate leaf. This is used by another test.
*/