You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by go...@apache.org on 2019/08/09 00:28:41 UTC

[orc] branch master updated: ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true)

This is an automated email from the ASF dual-hosted git repository.

gopalv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new a9c0ca4  ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true)
a9c0ca4 is described below

commit a9c0ca43cffab3acd73d19ac91297e1f507277c2
Author: Laszlo Bodor <bo...@gmail.com>
AuthorDate: Thu Aug 8 16:12:22 2019 -0700

    ORC-540: PPD: Positional lookups for columns with Options.forcePositionalEvolution(true)
    
    Fixes #418
    
    Signed-off-by: Gopal V <go...@apache.org>
---
 .../java/org/apache/orc/impl/RecordReaderImpl.java | 11 +++++-
 .../java/org/apache/orc/impl/SchemaEvolution.java  | 15 ++++++--
 .../org/apache/orc/impl/TestRecordReaderImpl.java  | 41 ++++++++++++++++++++--
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index d6ae7d5..de59579 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -97,8 +97,17 @@ public class RecordReaderImpl implements RecordReader {
   static int findColumns(SchemaEvolution evolution,
                          String columnName) {
     try {
-      return evolution.getFileSchema().findSubtype(columnName).getId();
+      final TypeDescription targetSchema;
+      if (evolution.getPositionalColumns()) {
+        targetSchema = evolution.getReaderBaseSchema();
+      } else {
+        targetSchema = evolution.getFileSchema();
+      }
+      return targetSchema.findSubtype(columnName).getId();
     } catch (IllegalArgumentException e) {
+      if (LOG.isDebugEnabled()){
+        LOG.debug("{}", e.getMessage());
+      }
       return -1;
     }
   }
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index d93c9bd..4fdc934 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -59,6 +59,9 @@ public class SchemaEvolution {
   // indexed by reader column id
   private final boolean[] ppdSafeConversion;
 
+  // columns are indexed, not named between Reader & File schema
+  private final boolean positionalColumns;
+
   private static final Logger LOG =
     LoggerFactory.getLogger(SchemaEvolution.class);
   private static final Pattern missingMetadataPattern =
@@ -82,8 +85,8 @@ public class SchemaEvolution {
     this.hasConversion = false;
     this.isOnlyImplicitConversion = true;
     this.fileSchema = fileSchema;
-    isAcid = checkAcidSchema(fileSchema);
-    includeAcidColumns = options.getIncludeAcidColumns();
+    this.isAcid = checkAcidSchema(fileSchema);
+    this.includeAcidColumns = options.getIncludeAcidColumns();
     this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
     if (readerSchema != null) {
       if (isAcid) {
@@ -134,6 +137,7 @@ public class SchemaEvolution {
       }
       buildIdentityConversion(this.readerSchema);
     }
+    this.positionalColumns = options.getForcePositionalEvolution();
     this.ppdSafeConversion = populatePpdSafeConversion();
   }
 
@@ -238,6 +242,13 @@ public class SchemaEvolution {
   }
 
   /**
+   * Get whether the columns are handled via position or name
+   */
+  public boolean getPositionalColumns() {
+    return this.positionalColumns;
+  }
+
+  /**
    * Determine if there is implicit conversion from a file to reader type.
    *
    * Implicit conversions are:
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 0763363..254ffcc 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -68,6 +68,7 @@ import org.apache.orc.impl.writer.StreamOptions;
 import org.apache.orc.util.BloomFilter;
 import org.apache.orc.DataReader;
 import org.apache.orc.RecordReader;
+import org.apache.orc.TestVectorOrcFile;
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.apache.orc.impl.RecordReaderImpl.Location;
@@ -84,8 +85,6 @@ import org.apache.orc.OrcProto;
 import org.apache.orc.util.BloomFilterIO;
 import org.apache.orc.util.BloomFilterUtf8;
 import org.junit.Test;
-import org.mockito.MockSettings;
-import org.mockito.Mockito;
 
 public class TestRecordReaderImpl {
 
@@ -102,6 +101,44 @@ public class TestRecordReaderImpl {
     assertEquals(3, RecordReaderImpl.findColumns(evo, "e"));
   }
 
+  @Test
+  public void testForcePositionalEvolution() throws Exception {
+    Configuration conf = new Configuration();
+
+    Path oldFilePath = new Path(TestVectorOrcFile.getFileFromClasspath("orc-file-11-format.orc"));
+    Reader reader = OrcFile.createReader(oldFilePath,
+        OrcFile.readerOptions(conf).filesystem(FileSystem.getLocal(conf)));
+
+    TypeDescription fileSchema =
+        TypeDescription.fromString("struct<col0:boolean,col1:tinyint,col2:smallint,"
+            + "col3:int,col4:bigint,col5:float,col6:double,col7:"
+            + "binary,col8:string,col9:struct<list:array<struct<int1:int,"
+            + "string1:string>>>,col10:array<struct<int1:int,string1:string>>,"
+            + "col11:map<string,struct<int1:int,string1:string>>,col12:timestamp,"
+            + "col13:decimal(38,10)>");
+
+    SchemaEvolution evo = new SchemaEvolution(fileSchema, reader.getSchema(),
+        new Reader.Options(conf).forcePositionalEvolution(true));
+    assertEquals(4, RecordReaderImpl.findColumns(evo, "int1"));
+
+    evo = new SchemaEvolution(fileSchema, reader.getSchema(),
+        new Reader.Options(conf).forcePositionalEvolution(false));
+    assertEquals(-1, RecordReaderImpl.findColumns(evo, "int1"));
+
+    TypeDescription acidSchema = SchemaEvolution.createEventSchema(fileSchema);
+
+    SchemaEvolution evoAcid =
+        new SchemaEvolution(acidSchema, reader.getSchema(),
+            new Reader.Options(conf).forcePositionalEvolution(true));
+    // ahead by 6 for 1 struct + 5 for row-id
+    assertEquals(6+4, RecordReaderImpl.findColumns(evoAcid, "int1"));
+
+    evoAcid =
+        new SchemaEvolution(acidSchema, reader.getSchema(),
+            new Reader.Options(conf).forcePositionalEvolution(false));
+    assertEquals(-1, RecordReaderImpl.findColumns(evoAcid, "int1"));
+  }
+
   /**
    * Create a predicate leaf. This is used by another test.
    */