You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2020/07/26 20:37:42 UTC

[GitHub] [iceberg] rdblue commented on a change in pull request #1235: avro: Abstract AvroWithPartnerSchemaVisitor

rdblue commented on a change in pull request #1235:
URL: https://github.com/apache/iceberg/pull/1235#discussion_r460570755



##########
File path: core/src/main/java/org/apache/iceberg/avro/AvroSchemaWithTypeVisitor.java
##########
@@ -19,117 +19,57 @@
 
 package org.apache.iceberg.avro;
 
-import java.util.Deque;
-import java.util.List;
 import org.apache.avro.Schema;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Type;
 import org.apache.iceberg.types.Types;
 
-public abstract class AvroSchemaWithTypeVisitor<T> {
+/**
+ * Avro {@link Schema} with expected Iceberg schema visitor. See {@link #schemaEvolution}, this class is for schema
+ * evolution reading. The avro schema can evolve into the expected Iceberg type.
+ */
+public abstract class AvroSchemaWithTypeVisitor<T> extends AvroWithPartnerSchemaVisitor<Type, T> {
   public static <T> T visit(org.apache.iceberg.Schema iSchema, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) {
     return visit(iSchema.asStruct(), schema, visitor);
   }
 
-  public static <T> T visit(Type iType, Schema schema, AvroSchemaWithTypeVisitor<T> visitor) {
-    switch (schema.getType()) {
-      case RECORD:
-        return visitRecord(iType != null ? iType.asStructType() : null, schema, visitor);
-
-      case UNION:
-        return visitUnion(iType, schema, visitor);
-
-      case ARRAY:
-        return visitArray(iType, schema, visitor);
-
-      case MAP:
-        Types.MapType map = iType != null ? iType.asMapType() : null;
-        return visitor.map(map, schema,
-            visit(map != null ? map.valueType() : null, schema.getValueType(), visitor));
-
-      default:
-        return visitor.primitive(iType != null ? iType.asPrimitiveType() : null, schema);
-    }
-  }
-
-  private static <T> T visitRecord(Types.StructType struct, Schema record, AvroSchemaWithTypeVisitor<T> visitor) {
-    // check to make sure this hasn't been visited before
-    String name = record.getFullName();
-    Preconditions.checkState(!visitor.recordLevels.contains(name),
-        "Cannot process recursive Avro record %s", name);
-
-    visitor.recordLevels.push(name);
-
-    List<Schema.Field> fields = record.getFields();
-    List<String> names = Lists.newArrayListWithExpectedSize(fields.size());
-    List<T> results = Lists.newArrayListWithExpectedSize(fields.size());
-    for (Schema.Field field : fields) {
-      int fieldId = AvroSchemaUtil.getFieldId(field);
-      Types.NestedField iField = struct != null ? struct.field(fieldId) : null;
-      names.add(field.name());
-      results.add(visit(iField != null ? iField.type() : null, field.schema(), visitor));
-    }
-
-    visitor.recordLevels.pop();
-
-    return visitor.record(struct, record, names, results);
-  }
-
-  private static <T> T visitUnion(Type type, Schema union, AvroSchemaWithTypeVisitor<T> visitor) {
-    List<Schema> types = union.getTypes();
-    List<T> options = Lists.newArrayListWithExpectedSize(types.size());
-    for (Schema branch : types) {
-      if (branch.getType() == Schema.Type.NULL) {
-        options.add(visit((Type) null, branch, visitor));
-      } else {
-        options.add(visit(type, branch, visitor));
-      }
-    }
-    return visitor.union(type, union, options);
+  @Override
+  public boolean schemaEvolution() {
+    return true;
   }
 
-  private static <T> T visitArray(Type type, Schema array, AvroSchemaWithTypeVisitor<T> visitor) {
-    if (array.getLogicalType() instanceof LogicalMap || (type != null && type.isMapType())) {
-      Preconditions.checkState(
-          AvroSchemaUtil.isKeyValueSchema(array.getElementType()),
-          "Cannot visit invalid logical map type: %s", array);
-      Types.MapType map = type != null ? type.asMapType() : null;
-      List<Schema.Field> keyValueFields = array.getElementType().getFields();
-      return visitor.map(map, array,
-          visit(map != null ? map.keyType() : null, keyValueFields.get(0).schema(), visitor),
-          visit(map != null ? map.valueType() : null, keyValueFields.get(1).schema(), visitor));
-
-    } else {
-      Types.ListType list = type != null ? type.asListType() : null;
-      return visitor.array(list, array,
-          visit(list != null ? list.elementType() : null, array.getElementType(), visitor));
-    }
+  @Override
+  public boolean isMapType(Type type) {
+    return type != null && type.isMapType();
   }
 
-  private Deque<String> recordLevels = Lists.newLinkedList();
-
-  public T record(Types.StructType iStruct, Schema record, List<String> names, List<T> fields) {
-    return null;
+  @Override
+  public boolean isValidMapKey(Type type) {
+    return type == null || type instanceof Types.StringType;
   }
 
-  public T union(Type iType, Schema union, List<T> options) {
-    return null;
+  @Override
+  public Type arrayElementType(Type arrayType) {
+    return arrayType == null ? null : arrayType.asListType().elementType();
   }
 
-  public T array(Types.ListType iList, Schema array, T element) {
-    return null;
+  @Override
+  public Type mapKeyType(Type mapType) {
+    return mapType == null ? null : mapType.asMapType().keyType();
   }
 
-  public T map(Types.MapType iMap, Schema map, T key, T value) {
-    return null;
+  @Override
+  public Type mapValueType(Type mapType) {
+    return mapType == null ? null : mapType.asMapType().valueType();

Review comment:
       If this defined `isNullType`, then the visit method could do these checks instead:
   
   ```java
     if (isNullType(mapType)) {
       return nullType();
     } else {
       return mapValueType(mapType);
     }
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org