You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/08/21 18:06:08 UTC
[orc] branch branch-1.5 updated (117c8c5 -> 78a3c18)
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a change to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git.
from 117c8c5 ORC-659: Initialize "next_in" before calling DeflateInit2 (#514)
new c9cf49e ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
new bad383e ORC-644: Support positional mapping for nested types.
new 78a3c18 Fix Julian/Gregorian cutoff in a comment
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
java/core/src/java/org/apache/orc/OrcConf.java | 4 ++
java/core/src/java/org/apache/orc/Reader.java | 18 +++++++
.../java/org/apache/orc/impl/SchemaEvolution.java | 7 ++-
.../org/apache/orc/impl/TestSchemaEvolution.java | 56 ++++++++++++++++++++++
proto/orc_proto.proto | 3 +-
5 files changed, 82 insertions(+), 6 deletions(-)
[orc] 01/03: ORC-626: Reading Struct Column Having Multiple Fields
With Same Name Causes java.io.EOFException
Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git
commit c9cf49e6f8b08830c123a03c9984ae9272ddc3b8
Author: Syed Shameerur Rahman <sr...@qubole.com>
AuthorDate: Sun May 3 13:45:05 2020 +0530
ORC-626: Reading Struct Column Having Multiple Fields With Same Name Causes java.io.EOFException
Fixes #505
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/src/java/org/apache/orc/OrcConf.java | 4 ++++
java/core/src/java/org/apache/orc/Reader.java | 18 ++++++++++++++++++
.../src/java/org/apache/orc/impl/SchemaEvolution.java | 5 ++---
.../test/org/apache/orc/impl/TestSchemaEvolution.java | 13 +++++++++++++
4 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 6586937..76d480b 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -147,6 +147,10 @@ public enum OrcConf {
"Require schema evolution to match the top level columns using position\n" +
"rather than column names. This provides backwards compatibility with\n" +
"Hive 2.1."),
+ FORCE_POSITIONAL_EVOLUTION_LEVEL("orc.force.positional.evolution.level",
+ "orc.force.positional.evolution.level", 1,
+ "Require schema evolution to match the the defined no. of level columns using position\n" +
+ "rather than column names. This provides backwards compatibility with Hive 2.1."),
ROWS_BETWEEN_CHECKS("orc.rows.between.memory.checks", "orc.rows.between.memory.checks", 5000,
"How often should MemoryManager check the memory sizes? Measured in rows\n" +
"added to all of the writers. Valid range is [1,10000] and is primarily meant for" +
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2c61234..2117b45 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -153,6 +153,7 @@ public interface Reader extends Closeable {
private boolean[] include;
private long offset = 0;
private long length = Long.MAX_VALUE;
+ private int positionalEvolutionLevel;
private SearchArgument sarg = null;
private String[] columnNames = null;
private Boolean useZeroCopy = null;
@@ -174,6 +175,7 @@ public interface Reader extends Closeable {
skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+ positionalEvolutionLevel = OrcConf.FORCE_POSITIONAL_EVOLUTION_LEVEL.getInt(conf);
isSchemaEvolutionCaseAware =
OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
}
@@ -269,6 +271,18 @@ public interface Reader extends Closeable {
}
/**
+ * Set no. of levels to force schema evolution to be positional instead of
+ * based on the column names.
+ * @param value force positional evolution
+ * @return this
+ */
+ public Options positionalEvolutionLevel(int value) {
+ this.positionalEvolutionLevel = value;
+ return this;
+ }
+
+
+ /**
* Set boolean flag to determine if the comparision of field names in schema
* evolution is case sensitive
* @param value the flag for schema evolution is case sensitive or not.
@@ -334,6 +348,10 @@ public interface Reader extends Closeable {
return forcePositionalEvolution;
}
+ public int getPositionalEvolutionLevel() {
+ return positionalEvolutionLevel;
+ }
+
public boolean getIsSchemaEvolutionCaseAware() {
return isSchemaEvolutionCaseAware;
}
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index e570035..b70c0e4 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -103,8 +103,7 @@ public class SchemaEvolution {
new TypeDescription[this.readerSchema.getMaximumId() + 1];
int positionalLevels = 0;
if (options.getForcePositionalEvolution()) {
- positionalLevels = isAcid ? 2 : 1;
- buildConversion(fileSchema, this.readerSchema, positionalLevels);
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
} else if (!hasColumnNames(isAcid? getBaseRow(fileSchema) : fileSchema)) {
if (!this.fileSchema.equals(this.readerSchema)) {
if (!allowMissingMetadata) {
@@ -117,7 +116,7 @@ public class SchemaEvolution {
+ " caused by a writer earlier than HIVE-4243. The reader will"
+ " reconcile schemas based on index. File type: " +
this.fileSchema + ", reader type: " + this.readerSchema);
- positionalLevels = isAcid ? 2 : 1;
+ positionalLevels = isAcid ? 2 : options.getPositionalEvolutionLevel();
}
}
}
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2dd0633..789bf55 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1703,6 +1703,19 @@ public class TestSchemaEvolution {
assertEquals(null, evo.getFileType(4));
}
+ @Test
+ public void testPositionalEvolutionLevel() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(2);
+ TypeDescription file = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ TypeDescription read = TypeDescription.fromString("struct<a:int,b:struct<y:int,y:int>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ }
+
// These are helper methods that pull some of the common code into one
// place.
[orc] 03/03: Fix Julian/Gregorian cutoff in a comment
Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git
commit 78a3c18c7c6a00b24ef6229f3e35eaefeb0e0bda
Author: Piotr Findeisen <pi...@gmail.com>
AuthorDate: Thu Aug 13 22:21:34 2020 +0200
Fix Julian/Gregorian cutoff in a comment
Fixes #538
Signed-off-by: Owen O'Malley <om...@apache.org>
---
proto/orc_proto.proto | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 24a62a4..e53c1c4 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -196,8 +196,7 @@ message Metadata {
enum CalendarKind {
UNKNOWN_CALENDAR = 0;
- // The Java default calendar changes from Julian to Gregorian
- // in 1583.
+ // A hybrid Julian/Gregorian calendar with a cutover point in October 1582.
JULIAN_GREGORIAN = 1;
// A calendar that extends the Gregorian calendar back forever.
PROLEPTIC_GREGORIAN = 2;
[orc] 02/03: ORC-644: Support positional mapping for nested types.
Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git
commit bad383e9074b1597af50e85e77e7b6ce79147e39
Author: Arvin Zheng <zm...@gmail.com>
AuthorDate: Mon Jul 27 17:32:04 2020 -0700
ORC-644: Support positional mapping for nested types.
Resolves #522
Signed-off-by: Owen O'Malley <om...@apache.org>
---
java/core/src/java/org/apache/orc/Reader.java | 4 +-
.../java/org/apache/orc/impl/SchemaEvolution.java | 2 +-
.../org/apache/orc/impl/TestSchemaEvolution.java | 43 ++++++++++++++++++++++
3 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2117b45..8403e99 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -271,9 +271,9 @@ public interface Reader extends Closeable {
}
/**
- * Set no. of levels to force schema evolution to be positional instead of
+ * Set number of levels to force schema evolution to be positional instead of
* based on the column names.
- * @param value force positional evolution
+ * @param value number of levels of positional schema evolution
* @return this
*/
public Options positionalEvolutionLevel(int value) {
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index b70c0e4..3184d5e 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -485,7 +485,7 @@ public class SchemaEvolution {
if (fileChildren.size() == readerChildren.size()) {
for(int i=0; i < fileChildren.size(); ++i) {
buildConversion(fileChildren.get(i),
- readerChildren.get(i), 0);
+ readerChildren.get(i), positionalLevels - 1);
}
} else {
isOk = false;
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 789bf55..f707ce4 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -1716,6 +1716,49 @@ public class TestSchemaEvolution {
assertEquals(4, evo.getFileType(4).getId());
}
+ @Test
+ public void testPositionalEvolutionForStructInArray() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(Integer.MAX_VALUE);
+ TypeDescription file = TypeDescription.fromString("array<struct<x:int,y:int,z:int>>");
+ TypeDescription read = TypeDescription.fromString("array<struct<z:int,x:int,a:int,b:int>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ assertEquals(null, evo.getFileType(5));
+ }
+
+ @Test
+ public void testPositionalEvolutionForTwoLayerNestedStruct() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(Integer.MAX_VALUE);
+ TypeDescription file = TypeDescription.fromString("struct<s:struct<x:int,y:int,z:int>>");
+ TypeDescription read = TypeDescription.fromString("struct<s:struct<z:int,x:int,a:int,b:int>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ assertNull(evo.getFileType(5));
+ }
+
+ @Test
+ public void testPositionalEvolutionForThreeLayerNestedStruct() throws IOException {
+ options.forcePositionalEvolution(true);
+ options.positionalEvolutionLevel(Integer.MAX_VALUE);
+ TypeDescription file = TypeDescription.fromString("struct<s1:struct<s2:struct<x:int,y:int,z:int>>>");
+ TypeDescription read = TypeDescription.fromString("struct<s1:struct<s:struct<z:int,x:int,a:int,b:int>>>");
+ SchemaEvolution evo = new SchemaEvolution(file, read, options);
+ assertEquals(1, evo.getFileType(1).getId());
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(3, evo.getFileType(3).getId());
+ assertEquals(4, evo.getFileType(4).getId());
+ assertEquals(5, evo.getFileType(5).getId());
+ assertNull(evo.getFileType(6));
+ }
+
// These are helper methods that pull some of the common code into one
// place.