You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by ek...@apache.org on 2018/07/26 00:49:35 UTC
orc git commit: ORC-389: Add ability to not decode Acid metadata
columns
Repository: orc
Updated Branches:
refs/heads/branch-1.5 739992586 -> 55db9ada2
ORC-389: Add ability to not decode Acid metadata columns
Fixes #295
Signed-off-by: Eugene Koifman <ek...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/55db9ada
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/55db9ada
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/55db9ada
Branch: refs/heads/branch-1.5
Commit: 55db9ada26781d7463ea127fcb327ee986a47c68
Parents: 7399925
Author: Eugene Koifman <ek...@apache.org>
Authored: Wed Jul 25 17:44:03 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Wed Jul 25 17:48:49 2018 -0700
----------------------------------------------------------------------
java/core/src/java/org/apache/orc/Reader.java | 15 +++++++-
.../org/apache/orc/impl/SchemaEvolution.java | 22 +++++++++--
.../apache/orc/impl/TestSchemaEvolution.java | 39 ++++++++++++++++++--
3 files changed, 68 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 58efda0..a90b6c4 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -162,6 +162,7 @@ public interface Reader {
private boolean forcePositionalEvolution;
private boolean isSchemaEvolutionCaseAware =
(boolean) OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getDefaultValue();
+ private boolean includeAcidColumns = true;
public Options() {
// PASS
@@ -276,7 +277,14 @@ public interface Reader {
this.isSchemaEvolutionCaseAware = value;
return this;
}
-
+ /**
+ * {@code true} if acid metadata columns should be decoded otherwise they will
+ * be set to {@code null}.
+ */
+ public Options includeAcidColumns(boolean includeAcidColumns) {
+ this.includeAcidColumns = includeAcidColumns;
+ return this;
+ }
public boolean[] getInclude() {
return include;
}
@@ -329,6 +337,10 @@ public interface Reader {
return isSchemaEvolutionCaseAware;
}
+ public boolean getIncludeAcidColumns() {
+ return includeAcidColumns;
+ }
+
public Options clone() {
try {
Options result = (Options) super.clone();
@@ -379,6 +391,7 @@ public interface Reader {
buffer.append(", schema: ");
schema.printToBuffer(buffer);
}
+ buffer.append(", includeAcidColumns: ").append(includeAcidColumns);
buffer.append("}");
return buffer.toString();
}
http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 8128308..480f13a 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -50,6 +50,11 @@ public class SchemaEvolution {
private boolean isOnlyImplicitConversion;
private final boolean isAcid;
private final boolean isSchemaEvolutionCaseAware;
+ /**
+ * {@code true} if acid metadata columns should be decoded otherwise they will
+ * be set to {@code null}. {@link #acidEventFieldNames}.
+ */
+ private final boolean includeAcidColumns;
// indexed by reader column id
private final boolean[] ppdSafeConversion;
@@ -65,7 +70,6 @@ public class SchemaEvolution {
super(msg);
}
}
-
public SchemaEvolution(TypeDescription fileSchema,
TypeDescription readerSchema,
Reader.Options options) {
@@ -79,6 +83,7 @@ public class SchemaEvolution {
this.isOnlyImplicitConversion = true;
this.fileSchema = fileSchema;
isAcid = checkAcidSchema(fileSchema);
+ includeAcidColumns = options.getIncludeAcidColumns();
this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
if (readerSchema != null) {
if (isAcid) {
@@ -402,9 +407,18 @@ public class SchemaEvolution {
* @return true if the column should be read
*/
public boolean includeReaderColumn(int readerId) {
- return readerIncluded == null ||
- readerId <= readerColumnOffset ||
- readerIncluded[readerId - readerColumnOffset];
+ if(readerId == 0) {
+ //always want top level struct - everything is its child
+ return true;
+ }
+ if(isAcid) {
+ if(readerId < readerColumnOffset) {
+ return includeAcidColumns;
+ }
+ return readerIncluded == null ||
+ readerIncluded[readerId - readerColumnOffset];
+ }
+ return readerIncluded == null || readerIncluded[readerId];
}
/**
http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index d203415..4edc1b4 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -18,9 +18,7 @@
package org.apache.orc.impl;
import static junit.framework.TestCase.assertSame;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
import java.io.File;
import java.io.IOException;
@@ -1448,6 +1446,41 @@ public class TestSchemaEvolution {
}
@Test
+ public void testAcidPositionEvolutionSkipAcid() {
+ TypeDescription fileType = TypeDescription.fromString(
+ "struct<operation:int,originalTransaction:bigint,bucket:int," +
+ "rowId:bigint,currentTransaction:bigint," +
+ "row:struct<_col0:int,_col1:string,_col2:double>>");
+ TypeDescription readerType = TypeDescription.fromString(
+ "struct<x:int,y:string>");
+ SchemaEvolution evo = new SchemaEvolution(fileType, readerType,
+ options.includeAcidColumns(false));
+ assertTrue(evo.isAcid());
+ assertEquals("struct<operation:int,originalTransaction:bigint,bucket:int," +
+ "rowId:bigint,currentTransaction:bigint," +
+ "row:struct<x:int,y:string>>", evo.getReaderSchema().toString());
+ assertEquals("struct<x:int,y:string>",
+ evo.getReaderBaseSchema().toString());
+ // the first stuff should be an identity
+ boolean[] fileInclude = evo.getFileIncluded();
+
+ //get top level struct col
+ assertEquals("column " + 0, 0, evo.getFileType(0).getId());
+ assertTrue("column " + 0, fileInclude[0]);
+ for(int c=1; c < 6; ++c) {
+ assertNull("column " + c, evo.getFileType(c));
+ //skip all acid metadata columns
+ assertFalse("column " + c, fileInclude[c]);
+ }
+ for(int c=6; c < 9; ++c) {
+ assertEquals("column " + c, c, evo.getFileType(c).getId());
+ assertTrue("column " + c, fileInclude[c]);
+ }
+ // don't read the last column
+ assertFalse(fileInclude[9]);
+ }
+
+ @Test
public void testAcidPositionEvolutionRemoveField() {
TypeDescription fileType = TypeDescription.fromString(
"struct<operation:int,originalTransaction:bigint,bucket:int," +