You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by zi...@apache.org on 2018/01/22 16:22:26 UTC
parquet-mr git commit: PARQUET-1170: Logical-type-based toString for
proper representeation in tools/logs
Repository: parquet-mr
Updated Branches:
refs/heads/master 878ebcd0b -> 89aeec028
PARQUET-1170: Logical-type-based toString for proper representeation in tools/logs
Author: Gabor Szadovszky <ga...@cloudera.com>
Closes #448 from gszadovszky/PARQUET-1170 and squashes the following commits:
8f1f8cc [Gabor Szadovszky] PARQUET-1170: Make interval test more readable
90f73b5 [Gabor Szadovszky] PARQUET-1170: Fix endianess of interval
612d70b [Gabor Szadovszky] PARQUET-1170: Add unit test for different locale
d8c5204 [Gabor Szadovszky] PARQUET-1170: Implement toString based on logical type so values will be represented properly in tools/logs etc.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/89aeec02
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/89aeec02
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/89aeec02
Branch: refs/heads/master
Commit: 89aeec028b6f56be96b9c56c2fdbb931f80853ad
Parents: 878ebcd
Author: Gabor Szadovszky <ga...@cloudera.com>
Authored: Mon Jan 22 17:21:27 2018 +0100
Committer: Zoltan Ivanfi <zi...@cloudera.com>
Committed: Mon Jan 22 17:21:27 2018 +0100
----------------------------------------------------------------------
.../main/java/org/apache/parquet/cli/Util.java | 40 +--
.../column/statistics/BinaryStatistics.java | 5 +-
.../column/statistics/BooleanStatistics.java | 5 +
.../column/statistics/DoubleStatistics.java | 4 +-
.../column/statistics/FloatStatistics.java | 4 +-
.../column/statistics/IntStatistics.java | 5 +-
.../column/statistics/LongStatistics.java | 5 +-
.../parquet/column/statistics/Statistics.java | 13 +-
.../org/apache/parquet/schema/OriginalType.java | 60 +++-
.../parquet/schema/PrimitiveStringifier.java | 360 +++++++++++++++++++
.../apache/parquet/schema/PrimitiveType.java | 10 +-
.../column/statistics/TestStatistics.java | 46 ++-
.../schema/TestPrimitiveStringifier.java | 298 +++++++++++++++
.../parquet/tools/command/DumpCommand.java | 34 +-
14 files changed, 791 insertions(+), 98 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
----------------------------------------------------------------------
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
index 04b3901..98bc1e5 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
@@ -27,15 +27,12 @@ import org.apache.commons.codec.binary.Hex;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
-import org.apache.parquet.column.statistics.BinaryStatistics;
-import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
-import java.nio.charset.StandardCharsets;
import java.util.Set;
import static org.apache.parquet.column.Encoding.BIT_PACKED;
@@ -90,46 +87,15 @@ public class Util {
if (!stats.hasNonNullValue()) {
return "";
}
- // TODO: use original types when showing decimal, timestamp, etc.
- if (stats instanceof BinaryStatistics) {
- byte[] minBytes = stats.getMinBytes();
- byte[] maxBytes = stats.getMaxBytes();
- return String.format("%s / %s",
- printable(minBytes, annotation == OriginalType.UTF8, 30),
- printable(maxBytes, annotation == OriginalType.UTF8, 30));
- } else {
- return String.format("%s / %s", stats.minAsString(), stats.maxAsString());
- }
+ return String.format("%s / %s", humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30));
}
public static String toString(Statistics stats, long count, OriginalType annotation) {
if (stats == null) {
return "no stats";
}
- // TODO: use original types when showing decimal, timestamp, etc.
- if (stats instanceof BooleanStatistics) {
- return String.format("nulls: %d/%d", stats.getNumNulls(), count);
- } else if (stats instanceof BinaryStatistics) {
- byte[] minBytes = stats.getMinBytes();
- byte[] maxBytes = stats.getMaxBytes();
- return String.format("min: %s max: %s nulls: %d/%d",
- printable(minBytes, annotation == OriginalType.UTF8, 30),
- printable(maxBytes, annotation == OriginalType.UTF8, 30),
- stats.getNumNulls(), count);
- } else {
- return String.format("min: %s max: %s nulls: %d/%d",
- stats.minAsString(), stats.maxAsString(), stats.getNumNulls(), count);
- }
- }
-
- private static String printable(byte[] bytes, boolean isUtf8, int len) {
- if (bytes == null) {
- return "null";
- } else if (isUtf8) {
- return humanReadable(new String(bytes, StandardCharsets.UTF_8), len);
- } else {
- return humanReadable(bytes, len);
- }
+ return String.format("min: %s max: %s nulls: %d/%d",
+ humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30), stats.getNumNulls(), count);
}
public static String humanReadable(String str, int len) {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java
index a68285b..8ffb585 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java
@@ -94,9 +94,8 @@ public class BinaryStatistics extends Statistics<Binary> {
}
@Override
- String toString(Binary value) {
- // TODO: have separate toString for different logical types?
- return value == null ? "null" : value.toStringUsingUTF8();
+ String stringify(Binary value) {
+ return stringifier.stringify(value);
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java
index 0e77b61..917fb5a 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java
@@ -88,6 +88,11 @@ public class BooleanStatistics extends Statistics<Boolean> {
}
@Override
+ String stringify(Boolean value) {
+ return stringifier.stringify(value);
+ }
+
+ @Override
public boolean isSmallerThan(long size) {
return !hasNonNullValue() || (2 < size);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java
index 0dd067b..fb58263 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java
@@ -88,8 +88,8 @@ public class DoubleStatistics extends Statistics<Double> {
}
@Override
- String toString(Double value) {
- return String.format("%.5f", value);
+ String stringify(Double value) {
+ return stringifier.stringify(value);
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java
index 36836c6..c731dcf 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java
@@ -89,8 +89,8 @@ public class FloatStatistics extends Statistics<Float> {
}
@Override
- String toString(Float value) {
- return String.format("%.5f", value);
+ String stringify(Float value) {
+ return stringifier.stringify(value);
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java
index 5df7f0a..ef68f69 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java
@@ -88,9 +88,8 @@ public class IntStatistics extends Statistics<Integer> {
}
@Override
- String toString(Integer value) {
- // TODO: implement unsigned int as required
- return value.toString();
+ String stringify(Integer value) {
+ return stringifier.stringify(value);
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java
index fd6d19c..d112afb 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java
@@ -88,9 +88,8 @@ public class LongStatistics extends Statistics<Long> {
}
@Override
- String toString(Long value) {
- // TODO: implement unsigned int as required
- return value.toString();
+ String stringify(Long value) {
+ return stringifier.stringify(value);
}
@Override
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java
index 6eb2381..00d0bbf 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java
@@ -19,11 +19,10 @@
package org.apache.parquet.column.statistics;
import java.util.Arrays;
-import java.util.Objects;
-
import org.apache.parquet.column.UnknownColumnTypeException;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveStringifier;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
@@ -40,10 +39,12 @@ public abstract class Statistics<T extends Comparable<T>> {
private final PrimitiveComparator<T> comparator;
private boolean hasNonNullValue;
private long num_nulls;
+ final PrimitiveStringifier stringifier;
Statistics(PrimitiveType type) {
this.type = type;
this.comparator = type.comparator();
+ this.stringifier = type.stringifier();
hasNonNullValue = false;
num_nulls = 0;
}
@@ -287,19 +288,17 @@ public abstract class Statistics<T extends Comparable<T>> {
* Returns the string representation of min for debugging/logging purposes.
*/
public String minAsString() {
- return toString(genericGetMin());
+ return stringify(genericGetMin());
}
/**
* Returns the string representation of max for debugging/logging purposes.
*/
public String maxAsString() {
- return toString(genericGetMax());
+ return stringify(genericGetMax());
}
- String toString(T value) {
- return Objects.toString(value);
- }
+ abstract String stringify(T value);
/**
* Abstract method to return whether the min and max values fit in the given
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
index 77acc54..b00ae7e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
@@ -21,24 +21,46 @@ package org.apache.parquet.schema;
public enum OriginalType {
MAP,
LIST,
- UTF8,
+ UTF8(PrimitiveStringifier.UTF8_STRINGIFIER),
MAP_KEY_VALUE,
- ENUM,
- DECIMAL,
- DATE,
- TIME_MILLIS,
- TIME_MICROS,
- TIMESTAMP_MILLIS,
- TIMESTAMP_MICROS,
- UINT_8,
- UINT_16,
- UINT_32,
- UINT_64,
- INT_8,
- INT_16,
- INT_32,
- INT_64,
- JSON,
- BSON,
- INTERVAL;
+ ENUM(PrimitiveStringifier.UTF8_STRINGIFIER),
+ DECIMAL {
+ @Override
+ PrimitiveStringifier stringifier(PrimitiveType type) {
+ return PrimitiveStringifier.createDecimalStringifier(type.getDecimalMetadata().getScale());
+ }
+ },
+ DATE(PrimitiveStringifier.DATE_STRINGIFIER),
+ TIME_MILLIS(PrimitiveStringifier.TIME_STRINGIFIER),
+ TIME_MICROS(PrimitiveStringifier.TIME_STRINGIFIER),
+ TIMESTAMP_MILLIS(PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER),
+ TIMESTAMP_MICROS(PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER),
+ UINT_8(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
+ UINT_16(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
+ UINT_32(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
+ UINT_64(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
+ INT_8(PrimitiveStringifier.DEFAULT_STRINGIFIER),
+ INT_16(PrimitiveStringifier.DEFAULT_STRINGIFIER),
+ INT_32(PrimitiveStringifier.DEFAULT_STRINGIFIER),
+ INT_64(PrimitiveStringifier.DEFAULT_STRINGIFIER),
+ JSON(PrimitiveStringifier.UTF8_STRINGIFIER),
+ BSON(PrimitiveStringifier.DEFAULT_STRINGIFIER),
+ INTERVAL(PrimitiveStringifier.INTERVAL_STRINGIFIER);
+
+ private final PrimitiveStringifier stringifier;
+
+ PrimitiveStringifier stringifier(PrimitiveType type) {
+ if (stringifier == null) {
+ throw new UnsupportedOperationException("Stringifier is not supported for the original type: " + this);
+ }
+ return stringifier;
+ }
+
+ OriginalType() {
+ this(null);
+ }
+
+ OriginalType(PrimitiveStringifier stringifier) {
+ this.stringifier = stringifier;
+ }
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
new file mode 100644
index 0000000..c1a9b58
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import static java.util.concurrent.TimeUnit.HOURS;
+import static java.util.concurrent.TimeUnit.MICROSECONDS;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.MINUTES;
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.text.SimpleDateFormat;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import javax.naming.OperationNotSupportedException;
+
+import org.apache.parquet.io.api.Binary;
+
+/**
+ * Class that provides string representations for the primitive values. These string values are to be used for
+ * logging/debugging purposes. The method {@code stringify} is overloaded for each primitive types. The overloaded
+ * methods not implemented for the related types throw {@link OperationNotSupportedException}.
+ */
+public abstract class PrimitiveStringifier {
+ private final String name;
+
+ private PrimitiveStringifier(String name) {
+ this.name = name;
+ }
+
+ @Override
+ public final String toString() {
+ return name;
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(boolean value) {
+ throw new UnsupportedOperationException(
+ "stringify(boolean) was called on a non-boolean stringifier: " + toString());
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(int value) {
+ throw new UnsupportedOperationException("stringify(int) was called on a non-int stringifier: " + toString());
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(long value) {
+ throw new UnsupportedOperationException("stringify(long) was called on a non-long stringifier: " + toString());
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(float value) {
+ throw new UnsupportedOperationException(
+ "stringify(float) was called on a non-float stringifier: " + toString());
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(double value) {
+ throw new UnsupportedOperationException(
+ "stringify(double) was called on a non-double stringifier: " + toString());
+ }
+
+ /**
+ * @param value
+ * the value to be stringified
+ * @return the string representation for {@code value}
+ * @throws UnsupportedOperationException
+ * if value type is not supported by this stringifier
+ */
+ public String stringify(Binary value) {
+ throw new UnsupportedOperationException(
+ "stringify(Binary) was called on a non-Binary stringifier: " + toString());
+ }
+
+ private static final String BINARY_NULL = "null";
+ private static final String BINARY_HEXA_PREFIX = "0x";
+ private static final String BINARY_INVALID = "<INVALID>";
+
+ static abstract class BinaryStringifierBase extends PrimitiveStringifier {
+ private BinaryStringifierBase(String name) {
+ super(name);
+ }
+
+ @Override
+ public final String stringify(Binary value) {
+ return value == null ? BINARY_NULL : stringifyNotNull(value);
+ }
+
+ abstract String stringifyNotNull(Binary value);
+ }
+
+ static final PrimitiveStringifier DEFAULT_STRINGIFIER = new BinaryStringifierBase("DEFAULT_STRINGIFIER") {
+ private final char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+
+ @Override
+ public String stringify(boolean value) {
+ return Boolean.toString(value);
+ }
+
+ @Override
+ public String stringify(int value) {
+ return Integer.toString(value);
+ }
+
+ @Override
+ public String stringify(long value) {
+ return Long.toString(value);
+ }
+
+ @Override
+ public String stringify(float value) {
+ return Float.toString(value);
+ }
+
+ @Override
+ public String stringify(double value) {
+ return Double.toString(value);
+ }
+
+ @Override
+ String stringifyNotNull(Binary value) {
+ ByteBuffer buffer = value.toByteBuffer();
+ StringBuilder builder = new StringBuilder(2 + buffer.remaining() * 2);
+ builder.append(BINARY_HEXA_PREFIX);
+ for (int i = buffer.position(), n = buffer.limit(); i < n; ++i) {
+ byte b = buffer.get(i);
+ builder.append(digits[(b >>> 4) & 0x0F]);
+ builder.append(digits[b & 0x0F]);
+ }
+ return builder.toString();
+ }
+ };
+
+ static final PrimitiveStringifier UNSIGNED_STRINGIFIER = new PrimitiveStringifier("UNSIGNED_STRINGIFIER") {
+ private static final long INT_MASK = 0x00000000FFFFFFFFl;
+
+ // Implemented based on com.google.common.primitives.UnsignedInts.toString(int, int)
+ @Override
+ public String stringify(int value) {
+ return Long.toString(value & INT_MASK);
+ }
+
+ // Implemented based on com.google.common.primitives.UnsignedLongs.toString(long, int)
+ @Override
+ public String stringify(long value) {
+ if (value == 0) {
+ // Simply return "0"
+ return "0";
+ } else if (value > 0) {
+ return Long.toString(value);
+ } else {
+ char[] buf = new char[64];
+ int i = buf.length;
+ // Split x into high-order and low-order halves.
+ // Individual digits are generated from the bottom half into which
+ // bits are moved continuously from the top half.
+ long top = value >>> 32;
+ long bot = (value & INT_MASK) + ((top % 10) << 32);
+ top /= 10;
+ while ((bot > 0) || (top > 0)) {
+ buf[--i] = Character.forDigit((int) (bot % 10), 10);
+ bot = (bot / 10) + ((top % 10) << 32);
+ top /= 10;
+ }
+ // Generate string
+ return new String(buf, i, buf.length - i);
+ }
+ }
+ };
+
+ static final PrimitiveStringifier UTF8_STRINGIFIER = new BinaryStringifierBase("UTF8_STRINGIFIER") {
+ @Override
+ String stringifyNotNull(Binary value) {
+ return value.toStringUsingUTF8();
+ }
+ };
+
+ static final PrimitiveStringifier INTERVAL_STRINGIFIER = new BinaryStringifierBase("INTERVAL_STRINGIFIER") {
+ @Override
+ String stringifyNotNull(Binary value) {
+ if (value.length() != 12) {
+ return BINARY_INVALID;
+ }
+ ByteBuffer buffer = value.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
+ int pos = buffer.position();
+ String months = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos));
+ String days = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos + 4));
+ String millis = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos + 8));
+ return "interval(" + months + " months, " + days + " days, " + millis + " millis)";
+ }
+ };
+
+ private static class DateStringifier extends PrimitiveStringifier {
+ private final SimpleDateFormat formatter;
+ private static final TimeZone UTC = TimeZone.getTimeZone("utc");
+
+ private DateStringifier(String name, String format) {
+ super(name);
+ formatter = new SimpleDateFormat(format);
+ formatter.setTimeZone(UTC);
+ }
+
+ @Override
+ public String stringify(int value) {
+ return toFormattedString(toMillis(value));
+ }
+
+ @Override
+ public String stringify(long value) {
+ return toFormattedString(toMillis(value));
+ }
+
+ private String toFormattedString(long millis) {
+ return formatter.format(millis);
+ }
+
+ long toMillis(int value) {
+ // throw the related unsupported exception
+ super.stringify(value);
+ return 0;
+ }
+
+ long toMillis(long value) {
+ // throw the related unsupported exception
+ super.stringify(value);
+ return 0;
+ }
+ }
+
+ static final PrimitiveStringifier DATE_STRINGIFIER = new DateStringifier("DATE_STRINGIFIER", "yyyy-MM-dd") {
+ @Override
+ long toMillis(int value) {
+ return TimeUnit.DAYS.toMillis(value);
+ };
+ };
+
+ static final PrimitiveStringifier TIMESTAMP_MILLIS_STRINGIFIER = new DateStringifier(
+ "TIMESTAMP_MILLIS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") {
+ @Override
+ long toMillis(long value) {
+ return value;
+ }
+ };
+
+ static final PrimitiveStringifier TIMESTAMP_MICROS_STRINGIFIER = new DateStringifier(
+ "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") {
+ @Override
+ public String stringify(long value) {
+ return super.stringify(value) + String.format("%03d", Math.abs(value % 1000));
+ }
+
+ @Override
+ long toMillis(long value) {
+ return value / 1000;
+ }
+ };
+
+ static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") {
+ @Override
+ public String stringify(int millis) {
+ return toTimeString(millis, MILLISECONDS);
+ }
+
+ @Override
+ public String stringify(long micros) {
+ return toTimeString(micros, MICROSECONDS);
+ }
+
+ private String toTimeString(long duration, TimeUnit unit) {
+ String format = "%02d:%02d:%02d.%0" + (unit == MILLISECONDS ? "3d" : "6d");
+ return String.format(format,
+ unit.toHours(duration),
+ convert(duration, unit, MINUTES, HOURS),
+ convert(duration, unit, SECONDS, MINUTES),
+ convert(duration, unit, unit, SECONDS));
+ }
+
+ private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
+ return Math.abs(to.convert(duration, from) % to.convert(1, higher));
+ }
+ };
+
+ static PrimitiveStringifier createDecimalStringifier(final int scale) {
+ return new BinaryStringifierBase("DECIMAL_STRINGIFIER(scale: " + scale + ")") {
+ @Override
+ public String stringify(int value) {
+ return stringifyWithScale(BigInteger.valueOf(value));
+ }
+
+ @Override
+ public String stringify(long value) {
+ return stringifyWithScale(BigInteger.valueOf(value));
+ }
+
+ @Override
+ String stringifyNotNull(Binary value) {
+ try {
+ return stringifyWithScale(new BigInteger(value.getBytesUnsafe()));
+ } catch (NumberFormatException e) {
+ return BINARY_INVALID;
+ }
+ }
+
+ private String stringifyWithScale(BigInteger i) {
+ return new BigDecimal(i, scale).toString();
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 2d7491f..a421173 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -377,7 +377,6 @@ public final class PrimitiveType extends Type {
abstract public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E;
abstract PrimitiveComparator<?> comparator(OriginalType logicalType);
-
}
private final PrimitiveTypeName primitive;
@@ -732,4 +731,13 @@ public final class PrimitiveType extends Type {
public ColumnOrder columnOrder() {
return columnOrder;
}
+
+ /**
+ * @return the {@link Type} specific stringifier for generating the proper string representation of the values.
+ */
+ @SuppressWarnings("unchecked")
+ public PrimitiveStringifier stringifier() {
+ OriginalType originalType = getOriginalType();
+ return originalType == null ? PrimitiveStringifier.DEFAULT_STRINGIFIER : originalType.stringifier(this);
+ }
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
index 476fbb3..8ca1ca6 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java
@@ -21,10 +21,15 @@ package org.apache.parquet.column.statistics;
import static org.junit.Assert.*;
import java.nio.ByteBuffer;
+import java.util.Locale;
import org.junit.Test;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Types;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
public class TestStatistics {
private int[] integerArray;
@@ -253,7 +258,7 @@ public class TestStatistics {
assertEquals(statsFromBytesMinMax.getMin(), Float.MIN_VALUE, 1e-10);
// Test print formatting
- assertEquals(stats.toString(), String.format("min: %.5f, max: %.5f, num_nulls: %d", 0.00010, 553.59998, 0));
+ assertEquals("min: 1.0E-4, max: 553.6, num_nulls: 0", stats.toString());
}
@Test
@@ -321,7 +326,25 @@ public class TestStatistics {
assertEquals(statsFromBytesMinMax.getMin(), Double.MIN_VALUE, 1e-10);
// Test print formatting
- assertEquals(stats.toString(), String.format("min: %.5f, max: %.5f, num_nulls: %d", 0.00001, 944.50000, 0));
+ assertEquals("min: 1.0E-5, max: 944.5, num_nulls: 0", stats.toString());
+ }
+
+ @Test
+ public void testFloatingPointStringIndependentFromLocale() {
+ Statistics<?> floatStats = Statistics.createStats(Types.optional(PrimitiveTypeName.FLOAT).named("test-float"));
+ floatStats.updateStats(123.456f);
+ Statistics<?> doubleStats = Statistics.createStats(Types.optional(PrimitiveTypeName.DOUBLE).named("test-double"));
+ doubleStats.updateStats(12345.6789);
+
+ Locale defaultLocale = Locale.getDefault();
+ try {
+ // Set the locale to French where the decimal separator would be ',' instead of '.'
+ Locale.setDefault(Locale.FRENCH);
+ assertEquals("min: 123.456, max: 123.456, num_nulls: 0", floatStats.toString());
+ assertEquals("min: 12345.6789, max: 12345.6789, num_nulls: 0", doubleStats.toString());
+ } finally {
+ Locale.setDefault(defaultLocale);
+ }
}
@Test
@@ -376,23 +399,24 @@ public class TestStatistics {
public void testBinaryMinMax() {
//Test basic max/min
stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"};
- BinaryStatistics stats = new BinaryStatistics();
+ PrimitiveType type = Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("test_binary_utf8");
+ BinaryStatistics stats = (BinaryStatistics) Statistics.createStats(type);
for (String s: stringArray) {
stats.updateStats(Binary.fromString(s));
}
- assertEquals(stats.getMax(), Binary.fromString("world"));
- assertEquals(stats.getMin(), Binary.fromString("a"));
+ assertEquals(stats.genericGetMax(), Binary.fromString("world"));
+ assertEquals(stats.genericGetMin(), Binary.fromString("a"));
// Test empty string
stringArray = new String[] {"", "", "", "", ""};
- BinaryStatistics statsEmpty = new BinaryStatistics();
+ BinaryStatistics statsEmpty = (BinaryStatistics) Statistics.createStats(type);
for (String s: stringArray) {
statsEmpty.updateStats(Binary.fromString(s));
}
- assertEquals(statsEmpty.getMax(), Binary.fromString(""));
- assertEquals(statsEmpty.getMin(), Binary.fromString(""));
+ assertEquals(statsEmpty.genericGetMax(), Binary.fromString(""));
+ assertEquals(statsEmpty.genericGetMin(), Binary.fromString(""));
// Test converting to and from byte[]
byte[] stringMaxBytes = stats.getMaxBytes();
@@ -401,11 +425,11 @@ public class TestStatistics {
assertEquals(new String(stringMaxBytes), "world");
assertEquals(new String(stringMinBytes), "a");
- BinaryStatistics statsFromBytes = new BinaryStatistics();
+ BinaryStatistics statsFromBytes = (BinaryStatistics) Statistics.createStats(type);
statsFromBytes.setMinMaxFromBytes(stringMinBytes, stringMaxBytes);
- assertEquals(statsFromBytes.getMax(), Binary.fromString("world"));
- assertEquals(statsFromBytes.getMin(), Binary.fromString("a"));
+ assertEquals(statsFromBytes.genericGetMax(), Binary.fromString("world"));
+ assertEquals(statsFromBytes.genericGetMin(), Binary.fromString("a"));
// Test print formatting
assertEquals(stats.toString(), "min: a, max: world, num_nulls: 0");
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
new file mode 100644
index 0000000..53045cf
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.util.concurrent.TimeUnit.HOURS;
+import static java.util.concurrent.TimeUnit.MICROSECONDS;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.MINUTES;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.parquet.io.api.Binary;
+import org.junit.Test;
+
+public class TestPrimitiveStringifier {
+
+ private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+
+ @Test
+ public void testDefaultStringifier() {
+ PrimitiveStringifier stringifier = DEFAULT_STRINGIFIER;
+
+ assertEquals("true", stringifier.stringify(true));
+ assertEquals("false", stringifier.stringify(false));
+
+ assertEquals("0.0", stringifier.stringify(0.0));
+ assertEquals("123456.7891234567", stringifier.stringify(123456.7891234567));
+ assertEquals("-98765.43219876543", stringifier.stringify(-98765.43219876543));
+
+ assertEquals("0.0", stringifier.stringify(0.0f));
+ assertEquals("987.6543", stringifier.stringify(987.6543f));
+ assertEquals("-123.4567", stringifier.stringify(-123.4567f));
+
+ assertEquals("0", stringifier.stringify(0));
+ assertEquals("1234567890", stringifier.stringify(1234567890));
+ assertEquals("-987654321", stringifier.stringify(-987654321));
+
+ assertEquals("0", stringifier.stringify(0l));
+ assertEquals("1234567890123456789", stringifier.stringify(1234567890123456789l));
+ assertEquals("-987654321987654321", stringifier.stringify(-987654321987654321l));
+
+ assertEquals("null", stringifier.stringify(null));
+ assertEquals("0x", stringifier.stringify(Binary.EMPTY));
+ assertEquals("0x0123456789ABCDEF", stringifier.stringify(Binary.fromConstantByteArray(
+ new byte[] { 0x01, 0x23, 0x45, 0x67, (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF })));
+ }
+
+ @Test
+ public void testUnsignedStringifier() {
+ PrimitiveStringifier stringifier = UNSIGNED_STRINGIFIER;
+
+ assertEquals("0", stringifier.stringify(0));
+ assertEquals("2147483647", stringifier.stringify(2147483647));
+ assertEquals("4294967295", stringifier.stringify(0xFFFFFFFF));
+
+ assertEquals("0", stringifier.stringify(0l));
+ assertEquals("9223372036854775807", stringifier.stringify(9223372036854775807l));
+ assertEquals("18446744073709551615", stringifier.stringify(0xFFFFFFFFFFFFFFFFl));
+
+ checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
+ }
+
+ @Test
+ public void testUTF8Stringifier() {
+ PrimitiveStringifier stringifier = UTF8_STRINGIFIER;
+
+ assertEquals("null", stringifier.stringify(null));
+ assertEquals("", stringifier.stringify(Binary.EMPTY));
+ assertEquals("This is a UTF-8 test", stringifier.stringify(Binary.fromString("This is a UTF-8 test")));
+ assertEquals("これはUTF-8のテストです",
+ stringifier.stringify(Binary.fromConstantByteArray("これはUTF-8のテストです".getBytes(UTF_8))));
+
+ checkThrowingUnsupportedException(stringifier, Binary.class);
+ }
+
+ @Test
+ public void testIntervalStringifier() {
+ PrimitiveStringifier stringifier = INTERVAL_STRINGIFIER;
+
+ assertEquals("null", stringifier.stringify(null));
+
+ assertEquals("<INVALID>", stringifier.stringify(Binary.EMPTY));
+ assertEquals("<INVALID>",
+ stringifier.stringify(Binary.fromConstantByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 })));
+ assertEquals("<INVALID>",
+ stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 })));
+
+ ByteBuffer buffer = ByteBuffer.allocate(12);
+ assertEquals("interval(0 months, 0 days, 0 millis)",
+ stringifier.stringify(Binary.fromConstantByteBuffer(buffer)));
+
+ buffer.putInt(0x03000000);
+ buffer.putInt(0x06000000);
+ buffer.putInt(0x09000000);
+ buffer.flip();
+ assertEquals("interval(3 months, 6 days, 9 millis)",
+ stringifier.stringify(Binary.fromConstantByteBuffer(buffer)));
+
+ buffer.clear();
+ buffer.putInt(0xFFFFFFFF);
+ buffer.putInt(0xFEFFFFFF);
+ buffer.putInt(0xFDFFFFFF);
+ buffer.flip();
+ assertEquals("interval(4294967295 months, 4294967294 days, 4294967293 millis)",
+ stringifier.stringify(Binary.fromReusedByteBuffer(buffer)));
+
+ checkThrowingUnsupportedException(stringifier, Binary.class);
+ }
+
+ @Test
+ public void testDateStringifier() {
+ PrimitiveStringifier stringifier = DATE_STRINGIFIER;
+
+ assertEquals("1970-01-01", stringifier.stringify(0));
+
+ Calendar cal = Calendar.getInstance(UTC);
+ cal.clear();
+ cal.set(2017, Calendar.DECEMBER, 14);
+ assertEquals("2017-12-14", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
+
+ cal.clear();
+ cal.set(1492, Calendar.AUGUST, 3);
+ assertEquals("1492-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
+
+ checkThrowingUnsupportedException(stringifier, Integer.TYPE);
+ }
+
+ @Test
+ public void testTimestampMillisStringifier() {
+ PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
+
+ assertEquals("1970-01-01T00:00:00.000", stringifier.stringify(0l));
+
+ Calendar cal = Calendar.getInstance(UTC);
+ cal.clear();
+ cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
+ cal.set(Calendar.MILLISECOND, 120);
+ assertEquals("2017-12-15T10:09:54.120", stringifier.stringify(cal.getTimeInMillis()));
+
+ cal.clear();
+ cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
+ cal.set(Calendar.MILLISECOND, 9);
+ assertEquals("1948-11-23T20:19:01.009", stringifier.stringify(cal.getTimeInMillis()));
+
+ checkThrowingUnsupportedException(stringifier, Long.TYPE);
+ }
+
+ @Test
+ public void testTimestampMicrosStringifier() {
+ PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
+
+ assertEquals("1970-01-01T00:00:00.000000", stringifier.stringify(0l));
+
+ Calendar cal = Calendar.getInstance(UTC);
+ cal.clear();
+ cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
+ cal.set(Calendar.MILLISECOND, 84);
+ long micros = cal.getTimeInMillis() * 1000 + 900;
+ assertEquals("2053-07-10T22:13:24.084900", stringifier.stringify(micros));
+
+ cal.clear();
+ cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
+ cal.set(Calendar.MILLISECOND, 765);
+ micros = cal.getTimeInMillis() * 1000 - 1;
+ assertEquals("1848-03-15T09:23:59.765001", stringifier.stringify(micros));
+
+ checkThrowingUnsupportedException(stringifier, Long.TYPE);
+ }
+
+ @Test
+ public void testTimeStringifier() {
+ PrimitiveStringifier stringifier = TIME_STRINGIFIER;
+
+ assertEquals("00:00:00.000", stringifier.stringify(0));
+ assertEquals("00:00:00.000000", stringifier.stringify(0l));
+
+ assertEquals("12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
+ assertEquals("12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));
+
+ assertEquals("-12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
+ assertEquals("-12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));
+
+ assertEquals("123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
+ assertEquals("12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));
+
+ assertEquals("-123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
+ assertEquals("-12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));
+
+ checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
+ }
+
+ private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) {
+ return unit.convert(hours, HOURS) + unit.convert(minutes, MINUTES) + unit.convert(seconds, SECONDS) + rest;
+ }
+
+ @Test
+ public void testDecimalStringifier() {
+ PrimitiveStringifier stringifier = PrimitiveStringifier.createDecimalStringifier(4);
+
+ assertEquals("0.0000", stringifier.stringify(0));
+ assertEquals("123456.7890", stringifier.stringify(1234567890));
+ assertEquals("-98765.4321", stringifier.stringify(-987654321));
+
+ assertEquals("0.0000", stringifier.stringify(0l));
+ assertEquals("123456789012345.6789", stringifier.stringify(1234567890123456789l));
+ assertEquals("-98765432109876.5432", stringifier.stringify(-987654321098765432l));
+
+ assertEquals("null", stringifier.stringify(null));
+ assertEquals("<INVALID>", stringifier.stringify(Binary.EMPTY));
+ assertEquals("0.0000", stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 0 })));
+ assertEquals("9876543210987654321098765432109876543210987654.3210", stringifier.stringify(Binary
+ .fromConstantByteArray(new BigInteger("98765432109876543210987654321098765432109876543210").toByteArray())));
+ assertEquals("-1234567890123456789012345678901234567890123456.7890", stringifier.stringify(Binary
+ .fromConstantByteArray(new BigInteger("-12345678901234567890123456789012345678901234567890").toByteArray())));
+
+ checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE, Binary.class);
+ }
+
+ private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class<?>... excludes) {
+ Set<Class<?>> set = new HashSet<>(Arrays.asList(excludes));
+ if (!set.contains(Integer.TYPE)) {
+ try {
+ stringifier.stringify(0);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ if (!set.contains(Long.TYPE)) {
+ try {
+ stringifier.stringify(0l);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ if (!set.contains(Float.TYPE)) {
+ try {
+ stringifier.stringify(0.0f);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ if (!set.contains(Double.TYPE)) {
+ try {
+ stringifier.stringify(0.0);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ if (!set.contains(Boolean.TYPE)) {
+ try {
+ stringifier.stringify(false);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ if (!set.contains(Binary.class)) {
+ try {
+ stringifier.stringify(Binary.EMPTY);
+ fail("An UnsupportedOperationException should have been thrown");
+ } catch (UnsupportedOperationException e) {
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
index 6cb12fa..26b5562 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
@@ -57,6 +57,7 @@ import org.apache.parquet.io.api.Converter;
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveStringifier;
import org.apache.parquet.tools.util.MetadataUtils;
import org.apache.parquet.tools.util.PrettyPrintWriter;
import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
@@ -309,16 +310,29 @@ public class DumpCommand extends ArgsOnlyCommand {
out.format("value %d: R:%d D:%d V:", offset+i, rlvl, dlvl);
if (dlvl == dmax) {
- switch (column.getType()) {
- case BINARY: out.format("%s", binaryToString(creader.getBinary())); break;
- case BOOLEAN: out.format("%s", creader.getBoolean()); break;
- case DOUBLE: out.format("%s", creader.getDouble()); break;
- case FLOAT: out.format("%s", creader.getFloat()); break;
- case INT32: out.format("%s", creader.getInteger()); break;
- case INT64: out.format("%s", creader.getLong()); break;
- case INT96: out.format("%s", binaryToBigInteger(creader.getBinary())); break;
- case FIXED_LEN_BYTE_ARRAY: out.format("%s", binaryToString(creader.getBinary())); break;
- }
+ PrimitiveStringifier stringifier = column.getPrimitiveType().stringifier();
+ switch (column.getType()) {
+ case FIXED_LEN_BYTE_ARRAY:
+ case INT96:
+ case BINARY:
+ out.print(stringifier.stringify(creader.getBinary()));
+ break;
+ case BOOLEAN:
+ out.print(stringifier.stringify(creader.getBoolean()));
+ break;
+ case DOUBLE:
+ out.print(stringifier.stringify(creader.getDouble()));
+ break;
+ case FLOAT:
+ out.print(stringifier.stringify(creader.getFloat()));
+ break;
+ case INT32:
+ out.print(stringifier.stringify(creader.getInteger()));
+ break;
+ case INT64:
+ out.print(stringifier.stringify(creader.getLong()));
+ break;
+ }
} else {
out.format("<null>");
}