You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/08/21 18:06:08 UTC

[orc] branch branch-1.5 updated (117c8c5 -> 78a3c18)

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a change to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git.


    from 117c8c5  ORC-659: Initialize "next_in" before calling DeflateInit2 (#514)
     new c9cf49e  ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
     new bad383e  ORC-644: Support positional mapping for nested types.
     new 78a3c18  Fix Julian/Gregorian cutoff in a comment

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 java/core/src/java/org/apache/orc/OrcConf.java     |  4 ++
 java/core/src/java/org/apache/orc/Reader.java      | 18 +++++++
 .../java/org/apache/orc/impl/SchemaEvolution.java  |  7 ++-
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 56 ++++++++++++++++++++++
 proto/orc_proto.proto                              |  3 +-
 5 files changed, 82 insertions(+), 6 deletions(-)


[orc] 01/03: ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit c9cf49e6f8b08830c123a03c9984ae9272ddc3b8
Author: Syed Shameerur Rahman <sr...@qubole.com>
AuthorDate: Sun May 3 13:45:05 2020 +0530

    ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
    
    Fixes #505
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java         |  4 ++++
 java/core/src/java/org/apache/orc/Reader.java          | 18 ++++++++++++++++++
 .../src/java/org/apache/orc/impl/SchemaEvolution.java  |  5 ++---
 .../test/org/apache/orc/impl/TestSchemaEvolution.java  | 13 +++++++++++++
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 6586937..76d480b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@ public enum OrcConf {
       "Require schema evolution to match the top level columns using position\n" +
       "rather than column names. This provides backwards compatibility with\n" +
       "Hive 2.1."),
+  FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+      "orc.force.positional.evolution.level", 1,
+      "Require schema evolution to match the the defined no. of level columns using position\n" +
+          "rather than column names. This provides backwards compatibility with Hive 2.1."),
   ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
     "How often should MemoryManager check the memory sizes? Measured in rows\n" +
       "added to all of the writers.  Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2c61234..2117b45 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -153,6 +153,7 @@ public interface Reader extends Closeable {
     private boolean[] include;
     private long offset = 0;
     private long length = Long.MAX_VALUE;
+    private int positionalEvolutionLevel;
     private SearchArgument sarg = null;
     private String[] columnNames = null;
     private Boolean useZeroCopy = null;
@@ -174,6 +175,7 @@ public interface Reader extends Closeable {
       skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
       tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
       forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+      positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
       isSchemaEvolutionCaseAware =
           OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
     }
@@ -269,6 +271,18 @@ public interface Reader extends Closeable {
     }
 
     /**
+     * Set no. of levels to force schema evolution to be positional instead of
+     * based on the column names.
+     * @param value force positional evolution
+     * @return this
+     */
+    public Options positionalEvolutionLevel(int value) {
+      this.positionalEvolutionLevel = value;
+      return this;
+    }
+
+
+    /**
      * Set boolean flag to determine if the comparision of field names in schema
      * evolution is case sensitive
      * @param value the flag for schema evolution is case sensitive or not.
@@ -334,6 +348,10 @@ public interface Reader extends Closeable {
       return forcePositionalEvolution;
     }
 
+    public int getPositionalEvolutionLevel() {
+      return positionalEvolutionLevel;
+    }
+
     public boolean getIsSchemaEvolutionCaseAware() {
       return isSchemaEvolutionCaseAware;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index e570035..b70c0e4 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -103,8 +103,7 @@ public class SchemaEvolution {
         new TypeDescription[this.readerSchema.getMaximumId() + 1];
       int positionalLevels = 0;
       if (options.getForcePositionalEvolution()) {
-        positionalLevels = isAcid ? 2 : 1;
-        buildConversion(fileSchema, this.readerSchema, positionalLevels);
+        positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
       } else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
         if (!this.fileSchema.equals(this.readerSchema)) {
           if (!allowMissingMetadata) {
@@ -117,7 +116,7 @@ public class SchemaEvolution {
                 + " caused by a writer earlier than HIVE-4243. The reader will"
                 + " reconcile schemas based on index. File type: " +
                 this.fileSchema + ", reader type: " + this.readerSchema);
-            positionalLevels = isAcid ? 2 : 1;
+            positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
           }
         }
       }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2dd0633..789bf55 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1703,6 +1703,19 @@ public class TestSchemaEvolution {
     assertEquals(null, evo.getFileType(4));
   }
 
+  @Test
+  public void testPositionalEvolutionLevel() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(2);
+    TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+  }
+
   // These are helper methods that pull some of the common code into one
   // place.
 


[orc] 03/03: Fix Julian/Gregorian cutoff in a comment

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 78a3c18c7c6a00b24ef6229f3e35eaefeb0e0bda
Author: Piotr Findeisen <pi...@gmail.com>
AuthorDate: Thu Aug 13 22:21:34 2020 +0200

    Fix Julian/Gregorian cutoff in a comment
    
    Fixes #538
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 proto/orc_proto.proto | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 24a62a4..e53c1c4 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -196,8 +196,7 @@ message Metadata {
 
 enum CalendarKind {
   UNKNOWN_CALENDAR = 0;
-   // The Java default calendar changes from Julian to Gregorian
-   // in 1583.
+   // A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
   JULIAN_GREGORIAN = 1;
   // A calendar that extends the Gregorian calendar back forever.
   PROLEPTIC_GREGORIAN = 2;


[orc] 02/03: ORC-644: Support positional mapping for nested types.

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git

commit bad383e9074b1597af50e85e77e7b6ce79147e39
Author: Arvin Zheng <zm...@gmail.com>
AuthorDate: Mon Jul 27 17:32:04 2020 -0700

    ORC-644: Support positional mapping for nested types.
    
    Resolves #522
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 java/core/src/java/org/apache/orc/Reader.java      |  4 +-
 .../java/org/apache/orc/impl/SchemaEvolution.java  |  2 +-
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 43 ++++++++++++++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2117b45..8403e99 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -271,9 +271,9 @@ public interface Reader extends Closeable {
     }
 
     /**
-     * Set no. of levels to force schema evolution to be positional instead of
+     * Set number of levels to force schema evolution to be positional instead of
      * based on the column names.
-     * @param value force positional evolution
+     * @param value number of levels of positional schema evolution
      * @return this
      */
     public Options positionalEvolutionLevel(int value) {
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index b70c0e4..3184d5e 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -485,7 +485,7 @@ public class SchemaEvolution {
           if (fileChildren.size() == readerChildren.size()) {
             for(int i=0; i < fileChildren.size(); ++i) {
               buildConversion(fileChildren.get(i),
-                              readerChildren.get(i), 0);
+                              readerChildren.get(i), positionalLevels - 1);
             }
           } else {
             isOk = false;
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 789bf55..f707ce4 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1716,6 +1716,49 @@ public class TestSchemaEvolution {
     assertEquals(4, evo.getFileType(4).getId());
   }
 
+  @Test
+  public void testPositionalEvolutionForStructInArray() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(Integer.MAX_VALUE);
+    TypeDescription file = TypeDescription.fromString("array<struct<x:int,y:int,z:int>>");
+    TypeDescription read = TypeDescription.fromString("array<struct<z:int,x:int,a:int,b:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+    assertEquals(null, evo.getFileType(5));
+  }
+
+  @Test
+  public void testPositionalEvolutionForTwoLayerNestedStruct() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(Integer.MAX_VALUE);
+    TypeDescription file = TypeDescription.fromString("struct<s:struct<x:int,y:int,z:int>>");
+    TypeDescription read = TypeDescription.fromString("struct<s:struct<z:int,x:int,a:int,b:int>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+    assertNull(evo.getFileType(5));
+  }
+
+  @Test
+  public void testPositionalEvolutionForThreeLayerNestedStruct() throws IOException {
+    options.forcePositionalEvolution(true);
+    options.positionalEvolutionLevel(Integer.MAX_VALUE);
+    TypeDescription file = TypeDescription.fromString("struct<s1:struct<s2:struct<x:int,y:int,z:int>>>");
+    TypeDescription read = TypeDescription.fromString("struct<s1:struct<s:struct<z:int,x:int,a:int,b:int>>>");
+    SchemaEvolution evo = new SchemaEvolution(file, read, options);
+    assertEquals(1, evo.getFileType(1).getId());
+    assertEquals(2, evo.getFileType(2).getId());
+    assertEquals(3, evo.getFileType(3).getId());
+    assertEquals(4, evo.getFileType(4).getId());
+    assertEquals(5, evo.getFileType(5).getId());
+    assertNull(evo.getFileType(6));
+  }
+
   // These are helper methods that pull some of the common code into one
   // place.