You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2020/07/15 19:11:34 UTC
[GitHub] [iceberg] rdblue commented on a change in pull request #1197: Refactor the GenericOrcWriter by using OrcSchemaWithTypeVisitor#visit

rdblue commented on a change in pull request #1197:
URL: https://github.com/apache/iceberg/pull/1197#discussion_r455281054



##########
File path: data/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java
##########
@@ -434,175 +144,12 @@ public void addValue(int rowId, BigDecimal data, ColumnVector output) {
 
     @Override
     @SuppressWarnings("unchecked")
-    public void addValue(int rowId, Record data, ColumnVector output) {
-      if (data == null) {
-        output.noNulls = false;
-        output.isNull[rowId] = true;
-      } else {
-        output.isNull[rowId] = false;
-        StructColumnVector cv = (StructColumnVector) output;
-        for (int c = 0; c < children.length; ++c) {
-          children[c].addValue(rowId, data.get(c, children[c].getJavaClass()), cv.fields[c]);
-        }
-      }
-    }
-  }
-
-  static class ListConverter implements Converter<List> {
-    private final Converter children;
-
-    ListConverter(TypeDescription schema) {
-      this.children = buildConverter(schema.getChildren().get(0));
-    }
-
-    @Override
-    public Class<List> getJavaClass() {
-      return List.class;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void addValue(int rowId, List data, ColumnVector output) {
-      if (data == null) {
-        output.noNulls = false;
-        output.isNull[rowId] = true;
-      } else {
-        output.isNull[rowId] = false;
-        List<Object> value = (List<Object>) data;
-        ListColumnVector cv = (ListColumnVector) output;
-        // record the length and start of the list elements
-        cv.lengths[rowId] = value.size();
-        cv.offsets[rowId] = cv.childCount;
-        cv.childCount += cv.lengths[rowId];
-        // make sure the child is big enough
-        cv.child.ensureSize(cv.childCount, true);
-        // Add each element
-        for (int e = 0; e < cv.lengths[rowId]; ++e) {
-          children.addValue((int) (e + cv.offsets[rowId]), value.get(e), cv.child);
-        }
+    public void nonNullWrite(int rowId, Record data, ColumnVector output) {
+      StructColumnVector cv = (StructColumnVector) output;
+      for (int c = 0; c < writers.size(); ++c) {
+        OrcValueWriter child = writers.get(c);
+        child.write(rowId, data.get(c, child.getJavaClass()), cv.fields[c]);
       }
     }
   }
-
-  static class MapConverter implements Converter<Map> {
-    private final Converter keyConverter;
-    private final Converter valueConverter;
-
-    MapConverter(TypeDescription schema) {
-      this.keyConverter = buildConverter(schema.getChildren().get(0));
-      this.valueConverter = buildConverter(schema.getChildren().get(1));
-    }
-
-    @Override
-    public Class<Map> getJavaClass() {
-      return Map.class;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void addValue(int rowId, Map data, ColumnVector output) {
-      if (data == null) {
-        output.noNulls = false;
-        output.isNull[rowId] = true;
-      } else {
-        output.isNull[rowId] = false;
-        Map<Object, Object> map = (Map<Object, Object>) data;
-        List<Object> keys = Lists.newArrayListWithExpectedSize(map.size());
-        List<Object> values = Lists.newArrayListWithExpectedSize(map.size());
-        for (Map.Entry<?, ?> entry : map.entrySet()) {
-          keys.add(entry.getKey());
-          values.add(entry.getValue());
-        }
-        MapColumnVector cv = (MapColumnVector) output;
-        // record the length and start of the list elements
-        cv.lengths[rowId] = map.size();
-        cv.offsets[rowId] = cv.childCount;
-        cv.childCount += cv.lengths[rowId];
-        // make sure the child is big enough
-        cv.keys.ensureSize(cv.childCount, true);
-        cv.values.ensureSize(cv.childCount, true);
-        // Add each element
-        for (int e = 0; e < cv.lengths[rowId]; ++e) {
-          int pos = (int) (e + cv.offsets[rowId]);
-          keyConverter.addValue(pos, keys.get(e), cv.keys);
-          valueConverter.addValue(pos, values.get(e), cv.values);
-        }
-      }
-    }
-  }
-
-  private static Converter buildConverter(TypeDescription schema) {
-    switch (schema.getCategory()) {
-      case BOOLEAN:
-        return new BooleanConverter();
-      case BYTE:
-        return new ByteConverter();
-      case SHORT:
-        return new ShortConverter();
-      case DATE:
-        return new DateConverter();
-      case INT:
-        return new IntConverter();
-      case LONG:
-        String longAttributeValue = schema.getAttributeValue(ORCSchemaUtil.ICEBERG_LONG_TYPE_ATTRIBUTE);
-        ORCSchemaUtil.LongType longType = longAttributeValue == null ? ORCSchemaUtil.LongType.LONG :
-            ORCSchemaUtil.LongType.valueOf(longAttributeValue);
-        switch (longType) {
-          case TIME:
-            return new TimeConverter();
-          case LONG:
-            return new LongConverter();
-          default:
-            throw new IllegalStateException("Unhandled Long type found in ORC type attribute: " + longType);
-        }
-      case FLOAT:
-        return new FloatConverter();
-      case DOUBLE:
-        return new DoubleConverter();
-      case BINARY:
-        String binaryAttributeValue = schema.getAttributeValue(ORCSchemaUtil.ICEBERG_BINARY_TYPE_ATTRIBUTE);
-        ORCSchemaUtil.BinaryType binaryType = binaryAttributeValue == null ? ORCSchemaUtil.BinaryType.BINARY :
-            ORCSchemaUtil.BinaryType.valueOf(binaryAttributeValue);
-        switch (binaryType) {
-          case UUID:
-            return new UUIDConverter();
-          case FIXED:
-            return new FixedConverter();

Review comment:
       > I'd prefer to pass the LocalDateTime object to the comparator and do the LocalDateTime to Long conversion when comparing
   
   Iceberg's internal representation does not use higher-level types like LocalDateTime for a few good reasons:
   1. It is simpler to work with ordinal values
   2. The interpretation of an ordinal value is delegated to the object model: Iceberg is agnostic to calendars, time zones, and other concerns that are built into the processing engines
   3. The guarantee is simpler: whatever data values are passed into Iceberg will be passed back out, unmodified




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org