You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by ek...@apache.org on 2018/07/26 00:49:35 UTC
orc git commit: ORC-389: Add ability to not decode Acid metadata columns

Repository: orc
Updated Branches:
  refs/heads/branch-1.5 739992586 -> 55db9ada2


ORC-389: Add ability to not decode Acid metadata columns

Fixes #295

Signed-off-by: Eugene Koifman <ek...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/55db9ada
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/55db9ada
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/55db9ada

Branch: refs/heads/branch-1.5
Commit: 55db9ada26781d7463ea127fcb327ee986a47c68
Parents: 7399925
Author: Eugene Koifman <ek...@apache.org>
Authored: Wed Jul 25 17:44:03 2018 -0700
Committer: Eugene Koifman <ek...@apache.org>
Committed: Wed Jul 25 17:48:49 2018 -0700

----------------------------------------------------------------------
 java/core/src/java/org/apache/orc/Reader.java   | 15 +++++++-
 .../org/apache/orc/impl/SchemaEvolution.java    | 22 +++++++++--
 .../apache/orc/impl/TestSchemaEvolution.java    | 39 ++++++++++++++++++--
 3 files changed, 68 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/java/org/apache/orc/Reader.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 58efda0..a90b6c4 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -162,6 +162,7 @@ public interface Reader {
     private boolean forcePositionalEvolution;
     private boolean isSchemaEvolutionCaseAware =
         (boolean) OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getDefaultValue();
+    private boolean includeAcidColumns = true;
 
     public Options() {
       // PASS
@@ -276,7 +277,14 @@ public interface Reader {
       this.isSchemaEvolutionCaseAware = value;
       return this;
     }
-
+    /**
+     * {@code true} if acid metadata columns should be decoded otherwise they will
+     * be set to {@code null}.
+     */
+    public Options includeAcidColumns(boolean includeAcidColumns) {
+      this.includeAcidColumns = includeAcidColumns;
+      return this;
+    }
     public boolean[] getInclude() {
       return include;
     }
@@ -329,6 +337,10 @@ public interface Reader {
       return isSchemaEvolutionCaseAware;
     }
 
+    public boolean getIncludeAcidColumns() {
+      return includeAcidColumns;
+    }
+
     public Options clone() {
       try {
         Options result = (Options) super.clone();
@@ -379,6 +391,7 @@ public interface Reader {
         buffer.append(", schema: ");
         schema.printToBuffer(buffer);
       }
+      buffer.append(", includeAcidColumns: ").append(includeAcidColumns);
       buffer.append("}");
       return buffer.toString();
     }

http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 8128308..480f13a 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -50,6 +50,11 @@ public class SchemaEvolution {
   private boolean isOnlyImplicitConversion;
   private final boolean isAcid;
   private final boolean isSchemaEvolutionCaseAware;
+  /**
+   * {@code true} if acid metadata columns should be decoded otherwise they will
+   * be set to {@code null}.  {@link #acidEventFieldNames}.
+   */
+  private final boolean includeAcidColumns;
 
   // indexed by reader column id
   private final boolean[] ppdSafeConversion;
@@ -65,7 +70,6 @@ public class SchemaEvolution {
       super(msg);
     }
   }
-
   public SchemaEvolution(TypeDescription fileSchema,
                          TypeDescription readerSchema,
                          Reader.Options options) {
@@ -79,6 +83,7 @@ public class SchemaEvolution {
     this.isOnlyImplicitConversion = true;
     this.fileSchema = fileSchema;
     isAcid = checkAcidSchema(fileSchema);
+    includeAcidColumns = options.getIncludeAcidColumns();
     this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
     if (readerSchema != null) {
       if (isAcid) {
@@ -402,9 +407,18 @@ public class SchemaEvolution {
    * @return true if the column should be read
    */
   public boolean includeReaderColumn(int readerId) {
-    return readerIncluded == null ||
-        readerId <= readerColumnOffset ||
-        readerIncluded[readerId - readerColumnOffset];
+    if(readerId == 0) {
+      //always want top level struct - everything is its child
+      return true;
+    }
+    if(isAcid) {
+      if(readerId < readerColumnOffset) {
+        return includeAcidColumns;
+      }
+      return readerIncluded == null ||
+          readerIncluded[readerId - readerColumnOffset];
+    }
+    return readerIncluded == null || readerIncluded[readerId];
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/55db9ada/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index d203415..4edc1b4 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -18,9 +18,7 @@
 package org.apache.orc.impl;
 
 import static junit.framework.TestCase.assertSame;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 import java.io.File;
 import java.io.IOException;
@@ -1448,6 +1446,41 @@ public class TestSchemaEvolution {
   }
 
   @Test
+  public void testAcidPositionEvolutionSkipAcid() {
+    TypeDescription fileType = TypeDescription.fromString(
+        "struct<operation:int,originalTransaction:bigint,bucket:int," +
+            "rowId:bigint,currentTransaction:bigint," +
+            "row:struct<_col0:int,_col1:string,_col2:double>>");
+    TypeDescription readerType = TypeDescription.fromString(
+        "struct<x:int,y:string>");
+    SchemaEvolution evo = new SchemaEvolution(fileType, readerType,
+        options.includeAcidColumns(false));
+    assertTrue(evo.isAcid());
+    assertEquals("struct<operation:int,originalTransaction:bigint,bucket:int," +
+        "rowId:bigint,currentTransaction:bigint," +
+        "row:struct<x:int,y:string>>", evo.getReaderSchema().toString());
+    assertEquals("struct<x:int,y:string>",
+        evo.getReaderBaseSchema().toString());
+    // the first stuff should be an identity
+    boolean[] fileInclude = evo.getFileIncluded();
+
+    //get top level struct col
+    assertEquals("column " + 0, 0, evo.getFileType(0).getId());
+    assertTrue("column " + 0, fileInclude[0]);
+    for(int c=1; c < 6; ++c) {
+      assertNull("column " + c, evo.getFileType(c));
+      //skip all acid metadata columns
+      assertFalse("column " + c, fileInclude[c]);
+    }
+    for(int c=6; c < 9; ++c) {
+      assertEquals("column " + c, c, evo.getFileType(c).getId());
+      assertTrue("column " + c, fileInclude[c]);
+    }
+    // don't read the last column
+    assertFalse(fileInclude[9]);
+  }
+
+  @Test
   public void testAcidPositionEvolutionRemoveField() {
     TypeDescription fileType = TypeDescription.fromString(
         "struct<operation:int,originalTransaction:bigint,bucket:int," +