You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by pr...@apache.org on 2018/06/21 07:23:09 UTC

orc git commit: ORC-380: Add isOnlyImplicitConversion boolean function to SchemaEvolution

Repository: orc
Updated Branches:
  refs/heads/master 388a61a64 -> f63c82753


ORC-380: Add isOnlyImplicitConversion boolean function to SchemaEvolution

Fixes #286


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/f63c8275
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/f63c8275
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/f63c8275

Branch: refs/heads/master
Commit: f63c82753119575564f03a7292e6d4576ec1f5cd
Parents: 388a61a
Author: Matt McCline <mm...@hortonworks.com>
Authored: Wed Jun 20 19:10:17 2018 -0500
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Thu Jun 21 00:22:31 2018 -0700

----------------------------------------------------------------------
 .../org/apache/orc/impl/SchemaEvolution.java    |  77 ++++++++-
 .../apache/orc/impl/TestSchemaEvolution.java    | 172 +++++++++++++++++++
 2 files changed, 248 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/f63c8275/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 571c0d9..8128308 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -46,7 +46,8 @@ public class SchemaEvolution {
   private final boolean[] fileIncluded;
   private final TypeDescription fileSchema;
   private final TypeDescription readerSchema;
-  private boolean hasConversion = false;
+  private boolean hasConversion;
+  private boolean isOnlyImplicitConversion;
   private final boolean isAcid;
   private final boolean isSchemaEvolutionCaseAware;
 
@@ -75,6 +76,7 @@ public class SchemaEvolution {
       Arrays.copyOf(includedCols, includedCols.length);
     this.fileIncluded = new boolean[fileSchema.getMaximumId() + 1];
     this.hasConversion = false;
+    this.isOnlyImplicitConversion = true;
     this.fileSchema = fileSchema;
     isAcid = checkAcidSchema(fileSchema);
     this.readerColumnOffset = isAcid ? acidEventFieldNames.size() : 0;
@@ -186,6 +188,17 @@ public class SchemaEvolution {
     return hasConversion;
   }
 
+  /**
+   * When there Schema Evolution data type conversion i.e. hasConversion() returns true,
+   * is the conversion only the implicit kind?
+   *
+   * (see aaa).
+   * @return
+   */
+  public boolean isOnlyImplicitConversion() {
+    return isOnlyImplicitConversion;
+  }
+
   public TypeDescription getFileSchema() {
     return fileSchema;
   }
@@ -220,6 +233,59 @@ public class SchemaEvolution {
   }
 
   /**
+   * Determine if there is implicit conversion from a file to reader type.
+   *
+   * Implicit conversions are:
+   *   Small to larger integer (e.g. INT to LONG)
+   *   FLOAT to DOUBLE
+   *   Some String Family conversions.
+   *
+   * NOTE: This check is independent of the PPD conversion checks.
+   * @return
+   */
+  private boolean typesAreImplicitConversion(final TypeDescription fileType,
+      final TypeDescription readerType) {
+    switch (fileType.getCategory()) {
+    case BYTE:
+        if (readerType.getCategory().equals(TypeDescription.Category.SHORT) ||
+            readerType.getCategory().equals(TypeDescription.Category.INT) ||
+            readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+          return true;
+        }
+        break;
+    case SHORT:
+        if (readerType.getCategory().equals(TypeDescription.Category.INT) ||
+            readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+          return true;
+        }
+        break;
+    case INT:
+        if (readerType.getCategory().equals(TypeDescription.Category.LONG)) {
+          return true;
+        }
+        break;
+    case FLOAT:
+        if (readerType.getCategory().equals(TypeDescription.Category.DOUBLE)) {
+          return true;
+        }
+        break;
+    case CHAR:
+    case VARCHAR:
+        if (readerType.getCategory().equals(TypeDescription.Category.STRING)) {
+          return true;
+        }
+        if (readerType.getCategory().equals(TypeDescription.Category.CHAR) ||
+            readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) {
+          return (fileType.getMaxLength() <= readerType.getMaxLength());
+        }
+        break;
+    default:
+        break;
+    }
+    return false;
+  }
+
+  /**
    * Check if column is safe for ppd evaluation
    * @param colId reader column id
    * @return true if the specified column is safe for ppd evaluation else false
@@ -382,6 +448,9 @@ public class SchemaEvolution {
           // maxLength.
           if (fileType.getMaxLength() != readerType.getMaxLength()) {
             hasConversion = true;
+            if (!typesAreImplicitConversion(fileType, readerType)) {
+              isOnlyImplicitConversion = false;
+            }
           }
           break;
         case DECIMAL:
@@ -390,6 +459,7 @@ public class SchemaEvolution {
           if (fileType.getPrecision() != readerType.getPrecision() ||
               fileType.getScale() != readerType.getScale()) {
             hasConversion = true;
+            isOnlyImplicitConversion = false;
           }
           break;
         case UNION:
@@ -413,6 +483,8 @@ public class SchemaEvolution {
           List<TypeDescription> fileChildren = fileType.getChildren();
           if (fileChildren.size() != readerChildren.size()) {
             hasConversion = true;
+            // UNDONE: Does LLAP detect fewer columns and NULL them out????
+            isOnlyImplicitConversion = false;
           }
 
           if (positionalLevels == 0) {
@@ -461,6 +533,9 @@ public class SchemaEvolution {
 
       isOk = ConvertTreeReaderFactory.canConvert(fileType, readerType);
       hasConversion = true;
+      if (!typesAreImplicitConversion(fileType, readerType)) {
+        isOnlyImplicitConversion = false;
+      }
     }
     if (isOk) {
       readerFileTypes[readerType.getId()] = fileType;

http://git-wip-us.apache.org/repos/asf/orc/blob/f63c8275/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index e452e57..d203415 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -97,6 +97,7 @@ public class TestSchemaEvolution {
         .addField("f3", TypeDescription.createDecimal().withPrecision(38).withScale(10));
     SchemaEvolution both1diff = new SchemaEvolution(fileStruct1, readerStruct1diff, options);
     assertTrue(both1diff.hasConversion());
+    assertTrue(both1diff.isOnlyImplicitConversion());
     TypeDescription readerStruct1diffPrecision = TypeDescription.createStruct()
         .addField("f1", TypeDescription.createInt())
         .addField("f2", TypeDescription.createString())
@@ -104,6 +105,7 @@ public class TestSchemaEvolution {
     SchemaEvolution both1diffPrecision = new SchemaEvolution(fileStruct1,
         readerStruct1diffPrecision, options);
     assertTrue(both1diffPrecision.hasConversion());
+    assertFalse(both1diffPrecision.isOnlyImplicitConversion());
   }
 
   @Test
@@ -144,6 +146,7 @@ public class TestSchemaEvolution {
         .addField("f6", TypeDescription.createChar().withMaxLength(100));
     SchemaEvolution both2diff = new SchemaEvolution(fileStruct2, readerStruct2diff, options);
     assertTrue(both2diff.hasConversion());
+    assertFalse(both2diff.isOnlyImplicitConversion());
     TypeDescription readerStruct2diffChar = TypeDescription.createStruct()
         .addField("f1", TypeDescription.createUnion()
             .addUnionChild(TypeDescription.createByte())
@@ -156,6 +159,175 @@ public class TestSchemaEvolution {
         .addField("f6", TypeDescription.createChar().withMaxLength(80));
     SchemaEvolution both2diffChar = new SchemaEvolution(fileStruct2, readerStruct2diffChar, options);
     assertTrue(both2diffChar.hasConversion());
+    assertFalse(both2diffChar.isOnlyImplicitConversion());
+  }
+
+  @Test
+  public void testIntegerImplicitConversion() throws IOException {
+    TypeDescription fileStructByte = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameByte = new SchemaEvolution(fileStructByte, null, options);
+    assertFalse(sameByte.hasConversion());
+    TypeDescription readerStructByte = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createByte())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothByte = new SchemaEvolution(fileStructByte, readerStructByte, options);
+    assertFalse(bothByte.hasConversion());
+    TypeDescription readerStructByte1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothByte1diff = new SchemaEvolution(fileStructByte, readerStructByte1diff, options);
+    assertTrue(bothByte1diff.hasConversion());
+    assertTrue(bothByte1diff.isOnlyImplicitConversion());
+    TypeDescription readerStructByte2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothByte2diff = new SchemaEvolution(fileStructByte, readerStructByte2diff, options);
+    assertTrue(bothByte2diff.hasConversion());
+    assertTrue(bothByte2diff.isOnlyImplicitConversion());
+    TypeDescription readerStruct3diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothByte3diff = new SchemaEvolution(fileStructByte, readerStruct3diff, options);
+    assertTrue(bothByte3diff.hasConversion());
+    assertTrue(bothByte3diff.isOnlyImplicitConversion());
+
+    TypeDescription fileStructShort = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameShort = new SchemaEvolution(fileStructShort, null, options);
+    assertFalse(sameShort.hasConversion());
+    TypeDescription readerStructShort = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createShort())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothShort = new SchemaEvolution(fileStructShort, readerStructShort, options);
+    assertFalse(bothShort.hasConversion());
+    TypeDescription readerStructShort1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothShort1diff = new SchemaEvolution(fileStructShort, readerStructShort1diff, options);
+    assertTrue(bothShort1diff.hasConversion());
+    assertTrue(bothShort1diff.isOnlyImplicitConversion());
+    TypeDescription readerStructShort2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothShort2diff = new SchemaEvolution(fileStructShort, readerStructShort2diff, options);
+    assertTrue(bothShort2diff.hasConversion());
+    assertTrue(bothShort2diff.isOnlyImplicitConversion());
+
+    TypeDescription fileStructInt = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameInt = new SchemaEvolution(fileStructInt, null, options);
+    assertFalse(sameInt.hasConversion());
+    TypeDescription readerStructInt = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createInt())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothInt = new SchemaEvolution(fileStructInt, readerStructInt, options);
+    assertFalse(bothInt.hasConversion());
+    TypeDescription readerStructInt1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createLong())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothInt1diff = new SchemaEvolution(fileStructInt, readerStructInt1diff, options);
+    assertTrue(bothInt1diff.hasConversion());
+    assertTrue(bothInt1diff.isOnlyImplicitConversion());
+  }
+
+  @Test
+  public void testFloatImplicitConversion() throws IOException {
+    TypeDescription fileStructFloat = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createFloat())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameFloat = new SchemaEvolution(fileStructFloat, null, options);
+    assertFalse(sameFloat.hasConversion());
+    TypeDescription readerStructFloat = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createFloat())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothFloat = new SchemaEvolution(fileStructFloat, readerStructFloat, options);
+    assertFalse(bothFloat.hasConversion());
+    TypeDescription readerStructFloat1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createDouble())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothFloat1diff = new SchemaEvolution(fileStructFloat, readerStructFloat1diff, options);
+    assertTrue(bothFloat1diff.hasConversion());
+    assertTrue(bothFloat1diff.isOnlyImplicitConversion());
+  }
+
+  @Test
+  public void testCharImplicitConversion() throws IOException {
+    TypeDescription fileStructChar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameChar = new SchemaEvolution(fileStructChar, null, options);
+    assertFalse(sameChar.hasConversion());
+    TypeDescription readerStructChar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothChar = new SchemaEvolution(fileStructChar, readerStructChar, options);
+    assertFalse(bothChar.hasConversion());
+    TypeDescription readerStructChar1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothChar1diff = new SchemaEvolution(fileStructChar, readerStructChar1diff, options);
+    assertTrue(bothChar1diff.hasConversion());
+    assertTrue(bothChar1diff.isOnlyImplicitConversion());
+    TypeDescription readerStructChar2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar().withMaxLength(14))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothChar2diff = new SchemaEvolution(fileStructChar, readerStructChar2diff, options);
+    assertTrue(bothChar2diff.hasConversion());
+    assertFalse(bothChar2diff.isOnlyImplicitConversion());
+    TypeDescription readerStructChar3diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothChar3diff = new SchemaEvolution(fileStructChar, readerStructChar3diff, options);
+    assertTrue(bothChar3diff.hasConversion());
+    assertTrue(bothChar3diff.isOnlyImplicitConversion());
+    TypeDescription readerStructChar4diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar().withMaxLength(14))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothChar4diff = new SchemaEvolution(fileStructChar, readerStructChar4diff, options);
+    assertTrue(bothChar4diff.hasConversion());
+    assertFalse(bothChar4diff.isOnlyImplicitConversion());
+  }
+
+  @Test
+  public void testVarcharImplicitConversion() throws IOException {
+    TypeDescription fileStructVarchar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution sameVarchar = new SchemaEvolution(fileStructVarchar, null, options);
+    assertFalse(sameVarchar.hasConversion());
+    TypeDescription readerStructVarchar = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothVarchar = new SchemaEvolution(fileStructVarchar, readerStructVarchar, options);
+    assertFalse(bothVarchar.hasConversion());
+    TypeDescription readerStructVarchar1diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createString())
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothVarchar1diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar1diff, options);
+    assertTrue(bothVarchar1diff.hasConversion());
+    assertTrue(bothVarchar1diff.isOnlyImplicitConversion());
+    TypeDescription readerStructVarchar2diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createVarchar().withMaxLength(14))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothVarchar2diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar2diff, options);
+    assertTrue(bothVarchar2diff.hasConversion());
+    assertFalse(bothVarchar2diff.isOnlyImplicitConversion());
+    TypeDescription readerStructVarchar3diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar().withMaxLength(15))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothVarchar3diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar3diff, options);
+    assertTrue(bothVarchar3diff.hasConversion());
+    assertTrue(bothVarchar3diff.isOnlyImplicitConversion());
+    TypeDescription readerStructVarchar4diff = TypeDescription.createStruct()
+        .addField("f1", TypeDescription.createChar().withMaxLength(14))
+        .addField("f2", TypeDescription.createString());
+    SchemaEvolution bothVarchar4diff = new SchemaEvolution(fileStructVarchar, readerStructVarchar4diff, options);
+    assertTrue(bothVarchar4diff.hasConversion());
+    assertFalse(bothVarchar4diff.isOnlyImplicitConversion());
   }
 
   @Test