You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/08/21 18:06:09 UTC

[orc] 01/03: ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit c9cf49e6f8b08830c123a03c9984ae9272ddc3b8
Author: Syed Shameerur Rahman <sr...@qubole.com>
AuthorDate: Sun May 3 13:45:05 2020 +0530

    ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
    
    Fixes #505
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java         |  4 ++++
 java/core/src/java/org/apache/orc/Reader.java          | 18 ++++++++++++++++++
 .../src/java/org/apache/orc/impl/SchemaEvolution.java  |  5 ++---
 .../test/org/apache/orc/impl/TestSchemaEvolution.java  | 13 +++++++++++++
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 6586937..76d480b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@ public enum OrcConf {
       "Require schema evolution to match the top level columns using position\n" +
       "rather than column names. This provides backwards compatibility with\n" +
       "Hive 2.1."),
+  FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+      "orc.force.positional.evolution.level", 1,
+      "Require schema evolution to match the the defined no. of level columns using position\n" +
+          "rather than column names. This provides backwards compatibility with Hive 2.1."),
   ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
     "How often should MemoryManager check the memory sizes? Measured in rows\n" +
       "added to all of the writers.  Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2c61234..2117b45 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -153,6 +153,7 @@ public interface Reader extends Closeable {
     private boolean[] include;
     private long offset = 0;
     private long length = Long.MAX_VALUE;
+    private int positionalEvolutionLevel;
     private SearchArgument sarg = null;
     private String[] columnNames = null;
     private Boolean useZeroCopy = null;
@@ -174,6 +175,7 @@ public interface Reader extends Closeable {
       skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
       tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
       forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+      positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
       isSchemaEvolutionCaseAware =
           OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
     }
@@ -269,6 +271,18 @@ public interface Reader extends Closeable {
     }
 
     /**
+     * Set no. of levels to force schema evolution to be positional instead of
+     * based on the column names.
+     * @param value force positional evolution
+     * @return this
+     */
+    public Options positionalEvolutionLevel(int value) {
+      this.positionalEvolutionLevel = value;
+      return this;
+    }
+
+
+    /**
      * Set boolean flag to determine if the comparision of field names in schema
      * evolution is case sensitive
      * @param value the flag for schema evolution is case sensitive or not.
@@ -334,6 +348,10 @@ public interface Reader extends Closeable {
       return forcePositionalEvolution;
     }
 
+    public int getPositionalEvolutionLevel() {
+      return positionalEvolutionLevel;
+    }
+
     public boolean getIsSchemaEvolutionCaseAware() {
       return isSchemaEvolutionCaseAware;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index e570035..b70c0e4 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -103,8 +103,7 @@ public class SchemaEvolution {
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
       int positionalLevels = 0;
       if (options.getForcePositionalEvolution()) {
-        positionalLevels = isAcid ? 2 : 1;
-        buildConversion(fileSchema, this.readerSchema, positionalLevels);
+        positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
       } else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
         if (!this.fileSchema.equals(this.readerSchema)) {
           if (!allowMissingMetadata) {
@@ -117,7 +116,7 @@ public class SchemaEvolution {
                 + " caused by a writer earlier than HIVE-4243. The reader will"
                 + " reconcile schemas based on index. File type: " +
                 this.fileSchema + ", reader type: " + this.readerSchema);
-            positionalLevels = isAcid ? 2 : 1;
+            positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
           }
         }
       }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2dd0633..789bf55 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1703,6 +1703,19 @@ public class TestSchemaEvolution {
     assertEquals(null, evo.getFileType(4));
   }
 
+  @Test
+  public void testPositionalEvolutionLevel() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(2);
+    TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+  }
+
   // These are helper methods that pull some of the common code into one
   // place.