You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/04/23 17:06:25 UTC

[incubator-iceberg] branch master updated: Refactor Parquet visitors to reduce duplication (#950)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new e990796  Refactor Parquet visitors to reduce duplication (#950)
e990796 is described below

commit e9907968a50b89d67a9aa3291602f193f21a257a
Author: openinx <op...@gmail.com>
AuthorDate: Fri Apr 24 01:06:16 2020 +0800

    Refactor Parquet visitors to reduce duplication (#950)
---
 .../data/parquet/GenericParquetReaders.java        | 27 ----------------------
 .../iceberg/data/parquet/GenericParquetWriter.java | 27 ----------------------
 .../iceberg/parquet/ParquetAvroValueReaders.java   | 27 ----------------------
 .../apache/iceberg/parquet/ParquetAvroWriter.java  | 27 ----------------------
 .../apache/iceberg/parquet/ParquetTypeVisitor.java | 10 ++++++++
 .../iceberg/parquet/TypeWithSchemaVisitor.java     | 10 ++++++++
 .../org/apache/iceberg/pig/PigParquetReader.java   | 27 ----------------------
 .../iceberg/spark/data/SparkParquetReaders.java    | 27 ----------------------
 .../iceberg/spark/data/SparkParquetWriters.java    | 26 ---------------------
 9 files changed, 20 insertions(+), 188 deletions(-)

diff --git a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java
index bc6767f..acb35ed 100644
--- a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java
+++ b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetReaders.java
@@ -30,7 +30,6 @@ import java.time.LocalTime;
 import java.time.OffsetDateTime;
 import java.time.ZoneOffset;
 import java.time.temporal.ChronoUnit;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import org.apache.iceberg.Schema;
@@ -297,32 +296,6 @@ public class GenericParquetReaders {
     MessageType type() {
       return type;
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    protected String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
diff --git a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java
index 922a970..d20db91 100644
--- a/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java
+++ b/data/src/main/java/org/apache/iceberg/data/parquet/GenericParquetWriter.java
@@ -27,7 +27,6 @@ import java.time.LocalTime;
 import java.time.OffsetDateTime;
 import java.time.ZoneOffset;
 import java.time.temporal.ChronoUnit;
-import java.util.Iterator;
 import java.util.List;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.parquet.ParquetTypeVisitor;
@@ -172,32 +171,6 @@ public class GenericParquetWriter {
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    private String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC);
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java
index a26fe92..6a27274 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroValueReaders.java
@@ -26,7 +26,6 @@ import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
@@ -231,32 +230,6 @@ public class ParquetAvroValueReaders {
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    private String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   static class DecimalReader extends ParquetValueReaders.PrimitiveReader<BigDecimal> {
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java
index b76a88d..a900669 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetAvroWriter.java
@@ -20,7 +20,6 @@
 package org.apache.iceberg.parquet;
 
 import com.google.common.collect.Lists;
-import java.util.Iterator;
 import java.util.List;
 import org.apache.avro.generic.GenericData.Fixed;
 import org.apache.avro.generic.IndexedRecord;
@@ -163,32 +162,6 @@ public class ParquetAvroWriter {
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    private String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static class FixedWriter extends PrimitiveWriter<Fixed> {
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
index 9d83053..6b68c84 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/ParquetTypeVisitor.java
@@ -160,4 +160,14 @@ public class ParquetTypeVisitor<T> {
   public T primitive(PrimitiveType primitive) {
     return null;
   }
+
+  protected String[] currentPath() {
+    return Lists.newArrayList(fieldNames.descendingIterator()).toArray(new String[0]);
+  }
+
+  protected String[] path(String name) {
+    List<String> list = Lists.newArrayList(fieldNames.descendingIterator());
+    list.add(name);
+    return list.toArray(new String[0]);
+  }
 }
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
index 7b741c3..a13705d 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/TypeWithSchemaVisitor.java
@@ -192,4 +192,14 @@ public class TypeWithSchemaVisitor<T> {
                      PrimitiveType primitive) {
     return null;
   }
+
+  protected String[] currentPath() {
+    return Lists.newArrayList(fieldNames.descendingIterator()).toArray(new String[0]);
+  }
+
+  protected String[] path(String name) {
+    List<String> list = Lists.newArrayList(fieldNames.descendingIterator());
+    list.add(name);
+    return list.toArray(new String[0]);
+  }
 }
diff --git a/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java b/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java
index aa3f1dc..fd9e9ac 100644
--- a/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java
+++ b/pig/src/main/java/org/apache/iceberg/pig/PigParquetReader.java
@@ -26,7 +26,6 @@ import java.time.Instant;
 import java.time.OffsetDateTime;
 import java.time.ZoneOffset;
 import java.time.temporal.ChronoUnit;
-import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -273,32 +272,6 @@ public class PigParquetReader {
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    protected String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static class DateReader extends PrimitiveReader<String> {
diff --git a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index 190b708..60a4138 100644
--- a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++ b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -27,7 +27,6 @@ import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import org.apache.iceberg.Schema;
@@ -281,35 +280,9 @@ public class SparkParquetReaders {
       }
     }
 
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
     protected MessageType type() {
       return type;
     }
-
-    protected String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static class BinaryDecimalReader extends PrimitiveReader<Decimal> {
diff --git a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index 3cd0d5e..52ebd82 100644
--- a/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -185,32 +185,6 @@ public class SparkParquetWriters {
           throw new UnsupportedOperationException("Unsupported type: " + primitive);
       }
     }
-
-    private String[] currentPath() {
-      String[] path = new String[fieldNames.size()];
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
-
-    private String[] path(String name) {
-      String[] path = new String[fieldNames.size() + 1];
-      path[fieldNames.size()] = name;
-
-      if (!fieldNames.isEmpty()) {
-        Iterator<String> iter = fieldNames.descendingIterator();
-        for (int i = 0; iter.hasNext(); i += 1) {
-          path[i] = iter.next();
-        }
-      }
-
-      return path;
-    }
   }
 
   private static PrimitiveWriter<UTF8String> utf8Strings(ColumnDescriptor desc) {