You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ti...@apache.org on 2015/07/13 19:36:32 UTC

parquet-mr git commit: PARQUET-279 : Check empty struct in compatibility checker

Repository: parquet-mr
Updated Branches:
  refs/heads/master abfe3559f -> fcd568282


PARQUET-279 : Check empty struct in compatibility checker

Add the empty struct check in the CompatibilityChecker util.
Parquet currently does not support empty struct/group without leaves

Author: Tianshuo Deng <td...@twitter.com>

Closes #194 from tsdeng/check_empty_struct and squashes the following commits:

35d77a1 [Tianshuo Deng] fix rebase
d781cf3 [Tianshuo Deng] simplify constructor
cd2fa8e [Tianshuo Deng] add State
e75a6ac [Tianshuo Deng] use immutable FieldsPath
2bff920 [Tianshuo Deng] fix test
69b4b9c [Tianshuo Deng] minor fixes
2db8c4b [Tianshuo Deng] remove unused println
5107ce2 [Tianshuo Deng] fix comments
265e228 [Tianshuo Deng] wip


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/fcd56828
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/fcd56828
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/fcd56828

Branch: refs/heads/master
Commit: fcd568282b2a150f9f42953f12268dc88d09da89
Parents: abfe355
Author: Tianshuo Deng <td...@twitter.com>
Authored: Mon Jul 13 10:36:18 2015 -0700
Committer: Tianshuo Deng <td...@twitter.com>
Committed: Mon Jul 13 10:36:18 2015 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/parquet/Strings.java   |  14 +-
 .../parquet/thrift/ThriftSchemaConverter.java   |   4 +-
 .../thrift/struct/CompatibilityChecker.java     | 151 ++++++++++++-------
 .../thrift/struct/CompatibilityRunner.java      |   7 +
 .../thrift/struct/CompatibilityCheckerTest.java |  16 +-
 parquet-thrift/src/test/thrift/compat.thrift    |   9 ++
 6 files changed, 144 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-common/src/main/java/org/apache/parquet/Strings.java
----------------------------------------------------------------------
diff --git a/parquet-common/src/main/java/org/apache/parquet/Strings.java b/parquet-common/src/main/java/org/apache/parquet/Strings.java
index 4d9fcc9..644e26a 100644
--- a/parquet-common/src/main/java/org/apache/parquet/Strings.java
+++ b/parquet-common/src/main/java/org/apache/parquet/Strings.java
@@ -40,7 +40,19 @@ public final class Strings {
    * @return a single joined string
    */
   public static String join(Iterable<String> s, String on) {
-    Iterator<String> iter = s.iterator();
+    return join(s.iterator(), on);
+  }
+
+  /**
+   * Join an Iterator of Strings into a single string with a delimiter.
+   * For example, join(Arrays.asList("foo","","bar","x"), "|") would return
+   * "foo||bar|x"
+   *
+   * @param iter an iterator of strings
+   * @param on the delimiter
+   * @return a single joined string
+   */
+  public static String join(Iterator<String> iter, String on) {
     StringBuilder sb = new StringBuilder();
     while (iter.hasNext()) {
       sb.append(iter.next());

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
index 4ce1e91..95d998b 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
@@ -124,11 +124,11 @@ public class ThriftSchemaConverter {
         break;
       case SET:
         final Field setElemField = field.getSetElemField();
-        type = new ThriftType.SetType(toThriftField(name, setElemField, requirement));
+        type = new ThriftType.SetType(toThriftField(setElemField.getName(), setElemField, requirement));
         break;
       case LIST:
         final Field listElemField = field.getListElemField();
-        type = new ThriftType.ListType(toThriftField(name, listElemField, requirement));
+        type = new ThriftType.ListType(toThriftField(listElemField.getName(), listElemField, requirement));
         break;
       case ENUM:
         Collection<TEnum> enumValues = field.getEnumValues();

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
index 46c26a5..81e0b55 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -19,9 +19,13 @@
 package org.apache.parquet.thrift.struct;
 
 
+import java.util.ArrayDeque;
 import java.util.ArrayList;
+import java.util.Deque;
+import java.util.Iterator;
 import java.util.List;
 
+import org.apache.parquet.thrift.projection.FieldsPath;
 import org.apache.parquet.thrift.struct.ThriftType.BoolType;
 import org.apache.parquet.thrift.struct.ThriftType.ByteType;
 import org.apache.parquet.thrift.struct.ThriftType.DoubleType;
@@ -30,9 +34,10 @@ import org.apache.parquet.thrift.struct.ThriftType.I16Type;
 import org.apache.parquet.thrift.struct.ThriftType.I32Type;
 import org.apache.parquet.thrift.struct.ThriftType.I64Type;
 import org.apache.parquet.thrift.struct.ThriftType.StringType;
+import org.apache.parquet.Strings;
 
 /**
- * A checker for thrift struct, returns compatibility report based on following rules:
+ * A checker for thrift struct to enforce its backward compatibility, returns compatibility report based on following rules:
  * 1. Should not add new REQUIRED field in new thrift struct. Adding optional field is OK
  * 2. Should not change field type for an existing field
  * 3. Should not delete existing field
@@ -43,8 +48,8 @@ import org.apache.parquet.thrift.struct.ThriftType.StringType;
 public class CompatibilityChecker {
 
   public CompatibilityReport checkCompatibility(ThriftType.StructType oldStruct, ThriftType.StructType newStruct) {
-    CompatibleCheckerVisitor visitor = new CompatibleCheckerVisitor(oldStruct);
-    newStruct.accept(visitor);
+    CompatibleCheckerVisitor visitor = new CompatibleCheckerVisitor();
+    newStruct.accept(visitor, new State(oldStruct,new FieldsPath()));
     return visitor.getReport();
   }
 
@@ -52,90 +57,120 @@ public class CompatibilityChecker {
 
 class CompatibilityReport {
   boolean isCompatible = true;
+  boolean hasEmptyStruct = false;
   List<String> messages = new ArrayList<String>();
 
   public boolean isCompatible() {
     return isCompatible;
   }
 
+  public boolean hasEmptyStruct() {
+    return hasEmptyStruct;
+  }
+
   public void fail(String message) {
     messages.add(message);
     isCompatible = false;
   }
 
+  public void emptyStruct(String message) {
+    messages.add(message);
+    hasEmptyStruct = true;
+  }
+
   public List<String> getMessages() {
     return messages;
   }
+
+  public String prettyMessages() {
+
+    return Strings.join(messages, "\n");
+  }
+
+  @Override
+  public String toString() {
+    return "CompatibilityReport{" +
+        "isCompatible=" + isCompatible +
+        ", hasEmptyStruct=" + hasEmptyStruct +
+        ", messages=\n" + prettyMessages() +
+        '}';
+  }
 }
 
-class CompatibleCheckerVisitor implements ThriftType.TypeVisitor {
+class State {
+  FieldsPath path;
   ThriftType oldType;
-  CompatibilityReport report = new CompatibilityReport();
 
-  CompatibleCheckerVisitor(ThriftType.StructType oldType) {
+  public State(ThriftType oldType, FieldsPath fieldsPath) {
+    this.path = fieldsPath;
     this.oldType = oldType;
   }
+}
+
+class CompatibleCheckerVisitor implements ThriftType.StateVisitor<Void, State> {
+
+  CompatibilityReport report = new CompatibilityReport();
 
   public CompatibilityReport getReport() {
     return report;
   }
 
   @Override
-  public void visit(ThriftType.MapType mapType) {
-    ThriftType.MapType currentOldType = ((ThriftType.MapType) oldType);
-    ThriftField oldKeyField = currentOldType.getKey();
+  public Void visit(ThriftType.MapType mapType, State state) {
+    ThriftType.MapType oldMapType = ((ThriftType.MapType) state.oldType);
+    ThriftField oldKeyField = oldMapType.getKey();
     ThriftField newKeyField = mapType.getKey();
 
     ThriftField newValueField = mapType.getValue();
-    ThriftField oldValueField = currentOldType.getValue();
+    ThriftField oldValueField = oldMapType.getValue();
+
 
-    checkField(oldKeyField, newKeyField);
-    checkField(oldValueField, newValueField);
+    checkField(oldKeyField, newKeyField, state.path);
+    checkField(oldValueField, newValueField, state.path);
 
-    oldType = currentOldType;
+    return null;
   }
 
   @Override
-  public void visit(ThriftType.SetType setType) {
-    ThriftType.SetType currentOldType = ((ThriftType.SetType) oldType);
-    ThriftField oldField = currentOldType.getValues();
+  public Void visit(ThriftType.SetType setType, State state) {
+    ThriftType.SetType oldSetType = ((ThriftType.SetType) state.oldType);
+    ThriftField oldField = oldSetType.getValues();
     ThriftField newField = setType.getValues();
-    checkField(oldField, newField);
-    oldType = currentOldType;
+    checkField(oldField, newField, state.path);
+    return null;
   }
 
   @Override
-  public void visit(ThriftType.ListType listType) {
-    ThriftType.ListType currentOldType = ((ThriftType.ListType) oldType);
+  public Void visit(ThriftType.ListType listType, State state) {
+    ThriftType.ListType currentOldType = ((ThriftType.ListType) state.oldType);
     ThriftField oldField = currentOldType.getValues();
     ThriftField newField = listType.getValues();
-    checkField(oldField, newField);
-    oldType = currentOldType;
+    checkField(oldField, newField, state.path);
+    return null;
   }
 
-  public void fail(String message) {
-    report.fail(message);
+  public void incompatible(String message, FieldsPath path) {
+    report.fail("at " + path + ":" +message);
   }
 
-  private void checkField(ThriftField oldField, ThriftField newField) {
+  private void checkField(ThriftField oldField, ThriftField newField, FieldsPath path) {
 
     if (!newField.getType().getType().equals(oldField.getType().getType())) {
-      fail("type is not compatible: " + oldField.getName() + " " + oldField.getType().getType() + " vs " + newField.getType().getType());
+      incompatible("type is not compatible: " + oldField.getType().getType() + " vs " + newField.getType().getType(), path);
       return;
     }
 
     if (!newField.getName().equals(oldField.getName())) {
-      fail("field names are different: " + oldField.getName() + " vs " + newField.getName());
+      incompatible("field names are different: " + oldField.getName() + " vs " + newField.getName(), path);
       return;
     }
 
     if (firstIsMoreRestirctive(newField.getRequirement(), oldField.getRequirement())) {
-      fail("new field is more restrictive: " + newField.getName());
+      incompatible("new field is more restrictive: " + newField.getName(), path);
       return;
     }
 
-    oldType = oldField.getType();
-    newField.getType().accept(this);
+    newField.getType().accept(this, new State(oldField.getType(),path.push(newField)));
   }
 
   private boolean firstIsMoreRestirctive(ThriftField.Requirement firstReq, ThriftField.Requirement secReq) {
@@ -148,20 +183,25 @@ class CompatibleCheckerVisitor implements ThriftType.TypeVisitor {
   }
 
   @Override
-  public void visit(ThriftType.StructType newStruct) {
-    ThriftType.StructType currentOldType = ((ThriftType.StructType) oldType);
+  public Void visit(ThriftType.StructType newStruct, State state) {
+    ThriftType.StructType oldStructType = ((ThriftType.StructType) state.oldType);
     short oldMaxId = 0;
-    for (ThriftField oldField : currentOldType.getChildren()) {
+
+    if (newStruct.getChildren().isEmpty()) {
+      report.emptyStruct("encountered an empty struct: " + state.path);
+    }
+
+    for (ThriftField oldField : oldStructType.getChildren()) {
       short fieldId = oldField.getFieldId();
       if (fieldId > oldMaxId) {
         oldMaxId = fieldId;
       }
       ThriftField newField = newStruct.getChildById(fieldId);
       if (newField == null) {
-        fail("can not find index in new Struct: " + fieldId +" in " + newStruct);
-        return;
+        incompatible("can not find index in new Struct: " + fieldId + " in " + newStruct, state.path);
+        return null;
       }
-      checkField(oldField, newField);
+      checkField(oldField, newField, state.path);
     }
 
     //check for new added
@@ -172,50 +212,57 @@ class CompatibleCheckerVisitor implements ThriftType.TypeVisitor {
 
       short newFieldId = newField.getFieldId();
       if (newFieldId > oldMaxId) {
-        fail("new required field " + newField.getName() + " is added");
-        return;
+        incompatible("new required field " + newField.getName() + " is added", state.path);
+        return null;
       }
-      if (newFieldId < oldMaxId && currentOldType.getChildById(newFieldId) == null) {
-        fail("new required field " + newField.getName() + " is added");
-        return;
+      if (newFieldId < oldMaxId && oldStructType.getChildById(newFieldId) == null) {
+        incompatible("new required field " + newField.getName() + " is added", state.path);
+        return null;
       }
 
     }
 
-    //restore
-    oldType = currentOldType;
+    return null;
   }
 
   @Override
-  public void visit(EnumType enumType) {
+  public Void visit(EnumType enumType, State state) {
+    return null;
   }
 
   @Override
-  public void visit(BoolType boolType) {
+  public Void visit(BoolType boolType, State state) {
+    return null;
   }
 
   @Override
-  public void visit(ByteType byteType) {
+  public Void visit(ByteType byteType, State state) {
+    return null;
   }
 
   @Override
-  public void visit(DoubleType doubleType) {
+  public Void visit(DoubleType doubleType, State state) {
+    return null;
   }
 
   @Override
-  public void visit(I16Type i16Type) {
+  public Void visit(I16Type i16Type, State state) {
+    return null;
   }
 
   @Override
-  public void visit(I32Type i32Type) {
+  public Void visit(I32Type i32Type, State state) {
+    return null;
   }
 
   @Override
-  public void visit(I64Type i64Type) {
+  public Void visit(I64Type i64Type, State state) {
+    return null;
   }
 
   @Override
-  public void visit(StringType stringType) {
+  public Void visit(StringType stringType, State state) {
+    return null;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityRunner.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityRunner.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityRunner.java
index 7b55c33..0d05e76 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityRunner.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityRunner.java
@@ -73,6 +73,13 @@ public class CompatibilityRunner {
       System.err.println(report.getMessages());
       System.exit(1);
     }
+
+    if (report.hasEmptyStruct()) {
+      System.err.println("schema contains empty struct");
+      System.err.println(report.getMessages());
+      System.exit(1);
+    }
+
     System.out.println("[success] schema is compatible");
 
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-thrift/src/test/java/org/apache/parquet/thrift/struct/CompatibilityCheckerTest.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/struct/CompatibilityCheckerTest.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/struct/CompatibilityCheckerTest.java
index b35974b..f07aa9d 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/struct/CompatibilityCheckerTest.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/struct/CompatibilityCheckerTest.java
@@ -23,6 +23,7 @@ import org.apache.parquet.thrift.ThriftSchemaConverter;
 import org.apache.parquet.thrift.test.compat.*;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 public class CompatibilityCheckerTest {
 
@@ -107,14 +108,25 @@ public class CompatibilityCheckerTest {
     verifyCompatible(ListStructV1.class, ListStructV2.class, true);
   }
 
+  @Test
+  public void testEmptyStruct() {
+    CompatibilityReport report = getCompatibilityReport(NestedEmptyStruct.class, NestedEmptyStruct.class);
+    assertEquals("encountered an empty struct: required_empty\nencountered an empty struct: optional_empty",report.prettyMessages());
+    assertTrue(report.hasEmptyStruct());
+  }
+
   private ThriftType.StructType struct(Class thriftClass) {
     return new ThriftSchemaConverter().toStructType(thriftClass);
   }
 
-  private void verifyCompatible(Class oldClass, Class newClass, boolean expectCompatible) {
+  private CompatibilityReport getCompatibilityReport(Class oldClass, Class newClass) {
     CompatibilityChecker checker = new CompatibilityChecker();
     CompatibilityReport report = checker.checkCompatibility(struct(oldClass), struct(newClass));
-    System.out.println(report.messages);
+    return report;
+  }
+
+  private void verifyCompatible(Class oldClass, Class newClass, boolean expectCompatible) {
+    CompatibilityReport report = getCompatibilityReport(oldClass, newClass);
     assertEquals(expectCompatible, report.isCompatible());
   }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/fcd56828/parquet-thrift/src/test/thrift/compat.thrift
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/thrift/compat.thrift b/parquet-thrift/src/test/thrift/compat.thrift
index 63972a8..8c90e23 100644
--- a/parquet-thrift/src/test/thrift/compat.thrift
+++ b/parquet-thrift/src/test/thrift/compat.thrift
@@ -253,3 +253,12 @@ struct ListOfUnions {
   1: optional list<UnionOfStructs> optListUnion
   2: required list<UnionOfStructs> reqListUnion
 }
+
+struct EmptyStruct {
+
+}
+
+struct NestedEmptyStruct {
+  1: required EmptyStruct required_empty
+  2: optional EmptyStruct optional_empty
+}