You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/08/21 18:06:09 UTC
[orc] 01/03: ORC-626: Reading Struct Column Having Multiple Fields
With Same Name Causes java.io.EOFException
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git
commit c9cf49e6f8b08830c123a03c9984ae9272ddc3b8
Author: Syed Shameerur Rahman <sr...@qubole.com>
AuthorDate: Sun May 3 13:45:05 2020 +0530
ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
Fixes #505
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/src/java/org/apache/orc/OrcConf.java | 4 ++++
java/core/src/java/org/apache/orc/Reader.java | 18 ++++++++++++++++++
.../src/java/org/apache/orc/impl/SchemaEvolution.java | 5 ++---
.../test/org/apache/orc/impl/TestSchemaEvolution.java | 13 +++++++++++++
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 6586937..76d480b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@ public enum OrcConf {
"Require schema evolution to match the top level columns using position\n" +
"rather than column names. This provides backwards compatibility with\n" +
"Hive 2.1."),
+ FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+ "orc.force.positional.evolution.level", 1,
+ "Require schema evolution to match the the defined no. of level columns using position\n" +
+ "rather than column names. This provides backwards compatibility with Hive 2.1."),
ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
"How often should MemoryManager check the memory sizes? Measured in rows\n" +
"added to all of the writers. Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2c61234..2117b45 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -153,6 +153,7 @@ public interface Reader extends Closeable {
private boolean[] include;
private long offset = 0;
private long length = Long.MAX_VALUE;
+ private int positionalEvolutionLevel;
private SearchArgument sarg = null;
private String[] columnNames = null;
private Boolean useZeroCopy = null;
@@ -174,6 +175,7 @@ public interface Reader extends Closeable {
skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+ positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
isSchemaEvolutionCaseAware =
OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
}
@@ -269,6 +271,18 @@ public interface Reader extends Closeable {
}
/**
+ * Set no. of levels to force schema evolution to be positional instead of
+ * based on the column names.
+ * @param value force positional evolution
+ * @return this
+ */
+ public Options positionalEvolutionLevel(int value) {
+ this.positionalEvolutionLevel = value;
+ return this;
+ }
+
+
+ /**
* Set boolean flag to determine if the comparision of field names in schema
* evolution is case sensitive
* @param value the flag for schema evolution is case sensitive or not.
@@ -334,6 +348,10 @@ public interface Reader extends Closeable {
return forcePositionalEvolution;
}
+ public int getPositionalEvolutionLevel() {
+ return positionalEvolutionLevel;
+ }
+
public boolean getIsSchemaEvolutionCaseAware() {
return isSchemaEvolutionCaseAware;
}
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index e570035..b70c0e4 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -103,8 +103,7 @@ public class SchemaEvolution {
new TypeDescription[this.readerSchema.getMaximumId() + 1];
int positionalLevels = 0;
if (options.getForcePositionalEvolution()) {
- positionalLevels = isAcid ? 2 : 1;
- buildConversion(fileSchema, this.readerSchema, positionalLevels);
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
} else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
if (!this.fileSchema.equals(this.readerSchema)) {
if (!allowMissingMetadata) {
@@ -117,7 +116,7 @@ public class SchemaEvolution {
+ " caused by a writer earlier than HIVE-4243. The reader will"
+ " reconcile schemas based on index. File type: " +
this.fileSchema + ", reader type: " + this.readerSchema);
- positionalLevels = isAcid ? 2 : 1;
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
}
}
}
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2dd0633..789bf55 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1703,6 +1703,19 @@ public class TestSchemaEvolution {
assertEquals(null, evo.getFileType(4));
}
+ @Test
+ public void testPositionalEvolutionLevel() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(2);
+ TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ }
+
// These are helper methods that pull some of the common code into one
// place.