You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/08/20 23:04:32 UTC

[orc] branch master updated: ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 4688240  ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
4688240 is described below

commit 468824044538aed45702e4b822cf5a2ade361e30
Author: Syed Shameerur Rahman <sr...@qubole.com>
AuthorDate: Sun May 3 13:45:05 2020 +0530

    ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
    
    Fixes #505
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java         |  4 ++++
 java/core/src/java/org/apache/orc/Reader.java          | 18 ++++++++++++++++++
 .../src/java/org/apache/orc/impl/SchemaEvolution.java  |  4 ++--
 .../test/org/apache/orc/impl/TestSchemaEvolution.java  | 13 +++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index bf83431..310364e 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@ public enum OrcConf {
       "Require schema evolution to match the top level columns using position\n" +
       "rather than column names. This provides backwards compatibility with\n" +
       "Hive 2.1."),
+  FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+      "orc.force.positional.evolution.level", 1,
+      "Require schema evolution to match the the defined no. of level columns using position\n" +
+          "rather than column names. This provides backwards compatibility with Hive 2.1."),
   ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
     "How often should MemoryManager check the memory sizes? Measured in rows\n" +
       "added to all of the writers.  Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 0aae622..4aa1cff 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -184,6 +184,7 @@ public interface Reader extends Closeable {
     private boolean[] include;
     private long offset = 0;
     private long length = Long.MAX_VALUE;
+    private int positionalEvolutionLevel;
     private SearchArgument sarg = null;
     private String[] columnNames = null;
     private Boolean useZeroCopy = null;
@@ -207,6 +208,7 @@ public interface Reader extends Closeable {
       skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
       tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
       forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+      positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
       isSchemaEvolutionCaseAware =
           OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
     }
@@ -330,6 +332,18 @@ public interface Reader extends Closeable {
     }
 
     /**
+     * Set no. of levels to force schema evolution to be positional instead of
+     * based on the column names.
+     * @param value force positional evolution
+     * @return this
+     */
+    public Options positionalEvolutionLevel(int value) {
+      this.positionalEvolutionLevel = value;
+      return this;
+    }
+
+
+    /**
      * Set boolean flag to determine if the comparision of field names in schema
      * evolution is case sensitive
      * @param value the flag for schema evolution is case sensitive or not.
@@ -404,6 +418,10 @@ public interface Reader extends Closeable {
       return forcePositionalEvolution;
     }
 
+    public int getPositionalEvolutionLevel() {
+      return positionalEvolutionLevel;
+    }
+
     public boolean getIsSchemaEvolutionCaseAware() {
       return isSchemaEvolutionCaseAware;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 1e86f26..dac5437 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -106,7 +106,7 @@ public class SchemaEvolution {
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
       int positionalLevels = 0;
       if (options.getForcePositionalEvolution()) {
-        positionalLevels = isAcid ? 2 : 1;
+        positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
       } else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
         if (!this.fileSchema.equals(this.readerSchema)) {
           if (!allowMissingMetadata) {
@@ -119,7 +119,7 @@ public class SchemaEvolution {
                 + " caused by a writer earlier than HIVE-4243. The reader will"
                 + " reconcile schemas based on index. File type: " +
                 this.fileSchema + ", reader type: " + this.readerSchema);
-            positionalLevels = isAcid ? 2 : 1;
+            positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
           }
         }
       }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index be7b616..f5fe82f 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1723,6 +1723,19 @@ public class TestSchemaEvolution {
     assertEquals(null, evo.getFileType(4));
   }
 
+  @Test
+  public void testPositionalEvolutionLevel() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(2);
+    TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+  }
+
   // These are helper methods that pull some of the common code into one
   // place.