You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2020/07/06 03:24:35 UTC

[GitHub] [iceberg] edgarRd commented on a change in pull request #1167: ORC: Simplify logic to determine which columns have stats

edgarRd commented on a change in pull request #1167:
URL: https://github.com/apache/iceberg/pull/1167#discussion_r449965598



##########
File path: orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java
##########
@@ -209,64 +206,17 @@ private static Metrics buildOrcMetrics(final long numOfRows, final TypeDescripti
     return Optional.ofNullable(Conversions.toByteBuffer(column.type(), max));
   }
 
-  private static Set<TypeDescription> findColumnsInContainers(Schema schema,
-                                                              TypeDescription orcSchema) {
-    ColumnsInContainersVisitor visitor = new ColumnsInContainersVisitor();
-    OrcSchemaWithTypeVisitor.visit(schema, orcSchema, visitor);
-    return visitor.getColumnsInContainers();
+  private static Set<Integer> statsColumns(TypeDescription schema) {
+    return OrcSchemaVisitor.visit(schema, new StatsColumnsVisitor());
   }
 
-  private static class ColumnsInContainersVisitor extends OrcSchemaWithTypeVisitor<TypeDescription> {
-
-    private final Set<TypeDescription> columnsInContainers;
-
-    private ColumnsInContainersVisitor() {
-      columnsInContainers = Sets.newHashSet();
-    }
-
-    public Set<TypeDescription> getColumnsInContainers() {
-      return columnsInContainers;
-    }
-
-    private Set<TypeDescription> flatten(TypeDescription rootType) {
-      if (rootType == null) {
-        return ImmutableSet.of();
-      }
-
-      final Set<TypeDescription> flatTypes = Sets.newHashSetWithExpectedSize(rootType.getMaximumId());
-      final Queue<TypeDescription> queue = Queues.newLinkedBlockingQueue();
-      queue.add(rootType);
-      while (!queue.isEmpty()) {
-        TypeDescription type = queue.remove();
-        flatTypes.add(type);
-        queue.addAll(Optional.ofNullable(type.getChildren()).orElse(ImmutableList.of()));
-      }
-      return flatTypes;
-    }
-
-    @Override
-    public TypeDescription record(Types.StructType iStruct, TypeDescription record,
-                                  List<String> names, List<TypeDescription> fields) {
-      return record;
-    }
-
-    @Override
-    public TypeDescription list(Types.ListType iList, TypeDescription array, TypeDescription element) {
-      columnsInContainers.addAll(flatten(element));
-      return array;
-    }
-
-    @Override
-    public TypeDescription map(Types.MapType iMap, TypeDescription map,
-                    TypeDescription key, TypeDescription value) {
-      columnsInContainers.addAll(flatten(key));
-      columnsInContainers.addAll(flatten(value));
-      return map;
-    }
-
+  private static class StatsColumnsVisitor extends OrcSchemaVisitor<Set<Integer>> {
     @Override
-    public TypeDescription primitive(Type.PrimitiveType iPrimitive, TypeDescription primitive) {
-      return primitive;
+    public Set<Integer> record(TypeDescription record, List<String> names, List<Set<Integer>> fields) {
+      ImmutableSet.Builder<Integer> result = ImmutableSet.builder();
+      fields.stream().filter(Objects::nonNull).forEach(result::addAll);
+      record.getChildren().stream().map(ORCSchemaUtil::fieldId).forEach(result::add);

Review comment:
       Just to verify, this would fail if there's any column that does not have an Iceberg ID. Is that preferred to skipping the metrics instead?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org