You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/04/28 01:12:33 UTC
[36/51] [partial] parquet-mr git commit: PARQUET-23: Rename to
org.apache.parquet.
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
new file mode 100644
index 0000000..3603c79
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import java.util.Arrays;
+import java.util.StringTokenizer;
+
+import org.apache.parquet.Log;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.Repetition;
+import org.apache.parquet.schema.Types.GroupBuilder;
+import org.apache.parquet.schema.Types.PrimitiveBuilder;
+
+/**
+ * Parses a schema from a textual format similar to that described in the Dremel paper.
+ *
+ * @author Julien Le Dem
+ */
+public class MessageTypeParser {
+ private static final Log LOG = Log.getLog(MessageTypeParser.class);
+
+ private static class Tokenizer {
+
+ private StringTokenizer st;
+
+ private int line = 0;
+ private StringBuffer currentLine = new StringBuffer();
+
+ public Tokenizer(String schemaString, String string) {
+ st = new StringTokenizer(schemaString, " ,;{}()\n\t=", true);
+ }
+
+ public String nextToken() {
+ while (st.hasMoreTokens()) {
+ String t = st.nextToken();
+ if (t.equals("\n")) {
+ ++ line;
+ currentLine.setLength(0);
+ } else {
+ currentLine.append(t);
+ }
+ if (!isWhitespace(t)) {
+ return t;
+ }
+ }
+ throw new IllegalArgumentException("unexpected end of schema");
+ }
+
+ private boolean isWhitespace(String t) {
+ return t.equals(" ") || t.equals("\t") || t.equals("\n");
+ }
+
+ public String getLocationString() {
+ return "line " + line + ": " + currentLine.toString();
+ }
+ }
+
+ private MessageTypeParser() {}
+
+ /**
+ *
+ * @param input the text representation of the schema to parse
+ * @return the corresponding object representation
+ */
+ public static MessageType parseMessageType(String input) {
+ return parse(input);
+ }
+
+ private static MessageType parse(String schemaString) {
+ Tokenizer st = new Tokenizer(schemaString, " ;{}()\n\t");
+ Types.MessageTypeBuilder builder = Types.buildMessage();
+
+ String t = st.nextToken();
+ check(t, "message", "start with 'message'", st);
+ String name = st.nextToken();
+ addGroupTypeFields(st.nextToken(), st, builder);
+ return builder.named(name);
+ }
+
+ private static void addGroupTypeFields(String t, Tokenizer st, Types.GroupBuilder builder) {
+ check(t, "{", "start of message", st);
+ while (!(t = st.nextToken()).equals("}")) {
+ addType(t, st, builder);
+ }
+ }
+
+ private static void addType(String t, Tokenizer st, Types.GroupBuilder builder) {
+ Repetition repetition = asRepetition(t, st);
+
+ // Read type.
+ String type = st.nextToken();
+ if ("group".equalsIgnoreCase(type)) {
+ addGroupType(t, st, repetition, builder);
+ } else {
+ addPrimitiveType(t, st, asPrimitive(type, st), repetition, builder);
+ }
+ }
+
+ private static void addGroupType(String t, Tokenizer st, Repetition r, GroupBuilder<?> builder) {
+ GroupBuilder<?> childBuilder = builder.group(r);
+ String name = st.nextToken();
+
+ // Read annotation, if any.
+ t = st.nextToken();
+ OriginalType originalType = null;
+ if (t.equalsIgnoreCase("(")) {
+ originalType = OriginalType.valueOf(st.nextToken());
+ childBuilder.as(originalType);
+ check(st.nextToken(), ")", "original type ended by )", st);
+ t = st.nextToken();
+ }
+ if (t.equals("=")) {
+ childBuilder.id(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ }
+ try {
+ addGroupTypeFields(t, st, childBuilder);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("problem reading type: type = group, name = " + name + ", original type = " + originalType, e);
+ }
+
+ childBuilder.named(name);
+ }
+
+ private static void addPrimitiveType(String t, Tokenizer st, PrimitiveTypeName type, Repetition r, Types.GroupBuilder<?> builder) {
+ PrimitiveBuilder<?> childBuilder = builder.primitive(type, r);
+
+ if (type == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
+ t = st.nextToken();
+ // Read type length if the type is fixed_len_byte_array.
+ if (!t.equalsIgnoreCase("(")) {
+ throw new IllegalArgumentException("expecting (length) for field of type fixed_len_byte_array");
+ }
+ childBuilder.length(Integer.parseInt(st.nextToken()));
+ check(st.nextToken(), ")", "type length ended by )", st);
+ }
+
+ String name = st.nextToken();
+
+ // Read annotation, if any.
+ t = st.nextToken();
+ OriginalType originalType = null;
+ if (t.equalsIgnoreCase("(")) {
+ originalType = OriginalType.valueOf(st.nextToken());
+ childBuilder.as(originalType);
+ if (OriginalType.DECIMAL == originalType) {
+ t = st.nextToken();
+ // parse precision and scale
+ if (t.equalsIgnoreCase("(")) {
+ childBuilder.precision(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ if (t.equalsIgnoreCase(",")) {
+ childBuilder.scale(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ }
+ check(t, ")", "decimal type ended by )", st);
+ t = st.nextToken();
+ }
+ } else {
+ t = st.nextToken();
+ }
+ check(t, ")", "original type ended by )", st);
+ t = st.nextToken();
+ }
+ if (t.equals("=")) {
+ childBuilder.id(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ }
+ check(t, ";", "field ended by ';'", st);
+
+ try {
+ childBuilder.named(name);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("problem reading type: type = " + type + ", name = " + name + ", original type = " + originalType, e);
+ }
+ }
+
+ private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) {
+ try {
+ return PrimitiveTypeName.valueOf(t.toUpperCase());
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("expected one of " + Arrays.toString(PrimitiveTypeName.values()) +" got " + t + " at " + st.getLocationString(), e);
+ }
+ }
+
+ private static Repetition asRepetition(String t, Tokenizer st) {
+ try {
+ return Repetition.valueOf(t.toUpperCase());
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("expected one of " + Arrays.toString(Repetition.values()) +" got " + t + " at " + st.getLocationString(), e);
+ }
+ }
+
+ private static void check(String t, String expected, String message, Tokenizer tokenizer) {
+ if (!t.equalsIgnoreCase(expected)) {
+ throw new IllegalArgumentException(message+ ": expected '" + expected + "' but got '" + t + "' at " + tokenizer.getLocationString());
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
new file mode 100644
index 0000000..0ea89ba
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+public enum OriginalType {
+ MAP,
+ LIST,
+ UTF8,
+ MAP_KEY_VALUE,
+ ENUM,
+ DECIMAL,
+ DATE,
+ TIME_MILLIS,
+ TIMESTAMP_MILLIS,
+ UINT_8,
+ UINT_16,
+ UINT_32,
+ UINT_64,
+ INT_8,
+ INT_16,
+ INT_32,
+ INT_64,
+ JSON,
+ BSON,
+ INTERVAL;
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
new file mode 100644
index 0000000..e8d98c0
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -0,0 +1,530 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.io.InvalidRecordException;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.io.api.RecordConsumer;
+
+
+/**
+ *
+ * Representation of a Primitive type
+ *
+ * @author Julien Le Dem
+ *
+ */
+public final class PrimitiveType extends Type {
+
+ public static interface PrimitiveTypeNameConverter<T, E extends Exception> {
+
+ T convertFLOAT(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertINT32(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertINT64(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertINT96(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws E;
+
+ T convertBINARY(PrimitiveTypeName primitiveTypeName) throws E;
+
+ }
+
+ /**
+ * Supported Primitive types
+ *
+ * @author Julien Le Dem
+ */
+ public static enum PrimitiveTypeName {
+ INT64("getLong", Long.TYPE) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getLong());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addLong(columnReader.getLong());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addLong(columnReader.getLong());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertINT64(this);
+ }
+ },
+ INT32("getInteger", Integer.TYPE) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getInteger());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addInteger(columnReader.getInteger());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addInt(columnReader.getInteger());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertINT32(this);
+ }
+ },
+ BOOLEAN("getBoolean", Boolean.TYPE) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getBoolean());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addBoolean(columnReader.getBoolean());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addBoolean(columnReader.getBoolean());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertBOOLEAN(this);
+ }
+ },
+ BINARY("getBinary", Binary.class) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getBinary());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addBinary(columnReader.getBinary());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addBinary(columnReader.getBinary());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertBINARY(this);
+ }
+ },
+ FLOAT("getFloat", Float.TYPE) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getFloat());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addFloat(columnReader.getFloat());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addFloat(columnReader.getFloat());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertFLOAT(this);
+ }
+ },
+ DOUBLE("getDouble", Double.TYPE) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getDouble());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addDouble(columnReader.getDouble());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addDouble(columnReader.getDouble());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertDOUBLE(this);
+ }
+ },
+ INT96("getBinary", Binary.class) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return Arrays.toString(columnReader.getBinary().getBytes());
+ }
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addBinary(columnReader.getBinary());
+ }
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addBinary(columnReader.getBinary());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertINT96(this);
+ }
+ },
+ FIXED_LEN_BYTE_ARRAY("getBinary", Binary.class) {
+ @Override
+ public String toString(ColumnReader columnReader) {
+ return String.valueOf(columnReader.getBinary());
+ }
+
+ @Override
+ public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader) {
+ recordConsumer.addBinary(columnReader.getBinary());
+ }
+
+ @Override
+ public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
+ primitiveConverter.addBinary(columnReader.getBinary());
+ }
+
+ @Override
+ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E {
+ return converter.convertFIXED_LEN_BYTE_ARRAY(this);
+ }
+ };
+
+ public final String getMethod;
+ public final Class<?> javaType;
+
+ private PrimitiveTypeName(String getMethod, Class<?> javaType) {
+ this.getMethod = getMethod;
+ this.javaType = javaType;
+ }
+
+ /**
+ * reads the value from the columnReader with the appropriate accessor and returns a String representation
+ * @param columnReader
+ * @return a string
+ */
+ abstract public String toString(ColumnReader columnReader);
+
+ /**
+ * reads the value from the columnReader with the appropriate accessor and writes it to the recordConsumer
+ * @param recordConsumer where to write
+ * @param columnReader where to read from
+ */
+ abstract public void addValueToRecordConsumer(RecordConsumer recordConsumer,
+ ColumnReader columnReader);
+
+ abstract public void addValueToPrimitiveConverter(
+ PrimitiveConverter primitiveConverter, ColumnReader columnReader);
+
+ abstract public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E;
+
+ }
+
+ private final PrimitiveTypeName primitive;
+ private final int length;
+ private final DecimalMetadata decimalMeta;
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param primitive STRING, INT64, ...
+ * @param name the name of the type
+ */
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+ String name) {
+ this(repetition, primitive, 0, name, null, null, null);
+ }
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param primitive STRING, INT64, ...
+ * @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise (XXX)
+ * @param name the name of the type
+ */
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name) {
+ this(repetition, primitive, length, name, null, null, null);
+ }
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param primitive STRING, INT64, ...
+ * @param name the name of the type
+ * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...)
+ */
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+ String name, OriginalType originalType) {
+ this(repetition, primitive, 0, name, originalType, null, null);
+ }
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param primitive STRING, INT64, ...
+ * @param name the name of the type
+ * @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise (XXX)
+ * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+ */
+ @Deprecated
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+ int length, String name, OriginalType originalType) {
+ this(repetition, primitive, length, name, originalType, null, null);
+ }
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param primitive STRING, INT64, ...
+ * @param name the name of the type
+ * @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise
+ * @param originalType (optional) the original type (MAP, DECIMAL, UTF8, ...)
+ * @param decimalMeta (optional) metadata about the decimal type
+ * @param id the id of the field
+ */
+ PrimitiveType(
+ Repetition repetition, PrimitiveTypeName primitive,
+ int length, String name, OriginalType originalType,
+ DecimalMetadata decimalMeta, ID id) {
+ super(name, repetition, originalType, id);
+ this.primitive = primitive;
+ this.length = length;
+ this.decimalMeta = decimalMeta;
+ }
+
+ /**
+ * @param id the field id
+ * @return a new PrimitiveType with the same fields and a new id
+ */
+ @Override
+ public PrimitiveType withId(int id) {
+ return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id));
+ }
+
+ /**
+ * @return the primitive type
+ */
+ public PrimitiveTypeName getPrimitiveTypeName() {
+ return primitive;
+ }
+
+ /**
+ * @return the type length
+ */
+ public int getTypeLength() {
+ return length;
+ }
+
+ /**
+ * @return the decimal type metadata
+ */
+ public DecimalMetadata getDecimalMetadata() {
+ return decimalMeta;
+ }
+
+ /**
+ * @return true
+ */
+ @Override
+ public boolean isPrimitive() {
+ return true;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void accept(TypeVisitor visitor) {
+ visitor.visit(this);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void writeToStringBuilder(StringBuilder sb, String indent) {
+ sb.append(indent)
+ .append(getRepetition().name().toLowerCase())
+ .append(" ")
+ .append(primitive.name().toLowerCase());
+ if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
+ sb.append("(" + length + ")");
+ }
+ sb.append(" ").append(getName());
+ if (getOriginalType() != null) {
+ sb.append(" (").append(getOriginalType());
+ DecimalMetadata meta = getDecimalMetadata();
+ if (meta != null) {
+ sb.append("(")
+ .append(meta.getPrecision())
+ .append(",")
+ .append(meta.getScale())
+ .append(")");
+ }
+ sb.append(")");
+ }
+ if (getId() != null) {
+ sb.append(" = ").append(getId());
+ }
+ }
+
+ @Override @Deprecated
+ protected int typeHashCode() {
+ return hashCode();
+ }
+
+ @Override @Deprecated
+ protected boolean typeEquals(Type other) {
+ return equals(other);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected boolean equals(Type other) {
+ if (!other.isPrimitive()) {
+ return false;
+ }
+ PrimitiveType otherPrimitive = other.asPrimitiveType();
+ return super.equals(other)
+ && primitive == otherPrimitive.getPrimitiveTypeName()
+ && length == otherPrimitive.length
+ && eqOrBothNull(decimalMeta, otherPrimitive.decimalMeta);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int hashCode() {
+ int hash = super.hashCode();
+ hash = hash * 31 + primitive.hashCode();
+ hash = hash * 31 + length;
+ if (decimalMeta != null) {
+ hash = hash * 31 + decimalMeta.hashCode();
+ }
+ return hash;
+ }
+
+ @Override
+ public int getMaxRepetitionLevel(String[] path, int i) {
+ if (path.length != i) {
+ throw new InvalidRecordException("Arrived at primitive node, path invalid");
+ }
+ return isRepetition(Repetition.REPEATED)? 1 : 0;
+ }
+
+ @Override
+ public int getMaxDefinitionLevel(String[] path, int i) {
+ if (path.length != i) {
+ throw new InvalidRecordException("Arrived at primitive node, path invalid");
+ }
+ return isRepetition(Repetition.REQUIRED) ? 0 : 1;
+ }
+
+ @Override
+ public Type getType(String[] path, int i) {
+ if (path.length != i) {
+ throw new InvalidRecordException("Arrived at primitive node at index " + i + " , path invalid: " + Arrays.toString(path));
+ }
+ return this;
+ }
+
+ @Override
+ protected List<String[]> getPaths(int depth) {
+ return Arrays.<String[]>asList(new String[depth]);
+ }
+
+ @Override
+ void checkContains(Type subType) {
+ super.checkContains(subType);
+ if (!subType.isPrimitive()) {
+ throw new InvalidRecordException(subType + " found: expected " + this);
+ }
+ PrimitiveType primitiveType = subType.asPrimitiveType();
+ if (this.primitive != primitiveType.primitive) {
+ throw new InvalidRecordException(subType + " found: expected " + this);
+ }
+
+ }
+
+ @Override
+ public <T> T convert(List<GroupType> path, TypeConverter<T> converter) {
+ return converter.convertPrimitiveType(path, this);
+ }
+
+ @Override
+ protected boolean containsPath(String[] path, int depth) {
+ return path.length == depth;
+ }
+
+ @Override
+ protected Type union(Type toMerge) {
+ return union(toMerge, true);
+ }
+
+ @Override
+ protected Type union(Type toMerge, boolean strict) {
+ if (!toMerge.isPrimitive() || (strict && !primitive.equals(toMerge.asPrimitiveType().getPrimitiveTypeName()))) {
+ throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " into " + this);
+ }
+ Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(
+ primitive, toMerge.getRepetition());
+ if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitive) {
+ builder.length(length);
+ }
+ return builder.named(getName());
+ }
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
new file mode 100644
index 0000000..99222f9
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import static org.apache.parquet.Preconditions.checkNotNull;
+
+import java.util.List;
+
+import org.apache.parquet.io.InvalidRecordException;
+
+/**
+ * Represents the declared type for a field in a schema.
+ * The Type object represents both the actual underlying type of the object
+ * (eg a primitive or group) as well as its attributes such as whether it is
+ * repeated, required, or optional.
+ */
+abstract public class Type {
+
+ /**
+ * represents a field ID
+ *
+ * @author Julien Le Dem
+ *
+ */
+ public static final class ID {
+ private final int id;
+
+ public ID(int id) {
+ this.id = id;
+ }
+
+ public int intValue() {
+ return id;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return (obj instanceof ID) && ((ID)obj).id == id;
+ }
+
+ @Override
+ public int hashCode() {
+ return id;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(id);
+ }
+ }
+
+ /**
+ * Constraint on the repetition of a field
+ *
+ * @author Julien Le Dem
+ */
+ public static enum Repetition {
+ /**
+ * exactly 1
+ */
+ REQUIRED {
+ @Override
+ public boolean isMoreRestrictiveThan(Repetition other) {
+ return other != REQUIRED;
+ }
+ },
+ /**
+ * 0 or 1
+ */
+ OPTIONAL {
+ @Override
+ public boolean isMoreRestrictiveThan(Repetition other) {
+ return other == REPEATED;
+ }
+ },
+ /**
+ * 0 or more
+ */
+ REPEATED {
+ @Override
+ public boolean isMoreRestrictiveThan(Repetition other) {
+ return false;
+ }
+ }
+ ;
+
+ /**
+ * @param other
+ * @return true if it is strictly more restrictive than other
+ */
+ abstract public boolean isMoreRestrictiveThan(Repetition other);
+
+ }
+
+ private final String name;
+ private final Repetition repetition;
+ private final OriginalType originalType;
+ private final ID id;
+
+ /**
+ * @param name the name of the type
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ */
+ @Deprecated
+ public Type(String name, Repetition repetition) {
+ this(name, repetition, null, null);
+ }
+
+ /**
+ * @param name the name of the type
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+ */
+ @Deprecated
+ public Type(String name, Repetition repetition, OriginalType originalType) {
+ this(name, repetition, originalType, null);
+ }
+
+ /**
+ * @param name the name of the type
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+ * @param id (optional) the id of the fields.
+ */
+ Type(String name, Repetition repetition, OriginalType originalType, ID id) {
+ super();
+ this.name = checkNotNull(name, "name");
+ this.repetition = checkNotNull(repetition, "repetition");
+ this.originalType = originalType;
+ this.id = id;
+ }
+
+ /**
+ * @param id
+ * @return the same type with the id field set
+ */
+ public abstract Type withId(int id);
+
+ /**
+ * @return the name of the type
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * @param rep
+ * @return if repetition of the type is rep
+ */
+ public boolean isRepetition(Repetition rep) {
+ return repetition == rep;
+ }
+
+ /**
+ * @return the repetition constraint
+ */
+ public Repetition getRepetition() {
+ return repetition;
+ }
+
+ /**
+ * @return the id of the field (if defined)
+ */
+ public ID getId() {
+ return id;
+ }
+
+ /**
+ * @return the original type (LIST, MAP, ...)
+ */
+ public OriginalType getOriginalType() {
+ return originalType;
+ }
+
+ /**
+ * @return if this is a primitive type
+ */
+ abstract public boolean isPrimitive();
+
+ /**
+ * @return this if it's a group type
+ * @throws ClassCastException if not
+ */
+ public GroupType asGroupType() {
+ if (isPrimitive()) {
+ throw new ClassCastException(this + " is not a group");
+ }
+ return (GroupType)this;
+ }
+
+ /**
+ * @return this if it's a primitive type
+ * @throws ClassCastException if not
+ */
+ public PrimitiveType asPrimitiveType() {
+ if (!isPrimitive()) {
+ throw new ClassCastException(this + " is not primitive");
+ }
+ return (PrimitiveType)this;
+ }
+
+ /**
+ * Writes a string representation to the provided StringBuilder
+ * @param sb the StringBuilder to write itself to
+ * @param indent indentation level
+ */
+ abstract public void writeToStringBuilder(StringBuilder sb, String indent);
+
+ /**
+ * Visits this type with the given visitor
+ * @param visitor the visitor to visit this type
+ */
+ abstract public void accept(TypeVisitor visitor);
+
+ @Deprecated
+ abstract protected int typeHashCode();
+
+ @Deprecated
+ abstract protected boolean typeEquals(Type other);
+
+ @Override
+ public int hashCode() {
+ int c = repetition.hashCode();
+ c = 31 * c + name.hashCode();
+ if (originalType != null) {
+ c = 31 * c + originalType.hashCode();
+ }
+ if (id != null) {
+ c = 31 * c + id.hashCode();
+ }
+ return c;
+ }
+
+ protected boolean equals(Type other) {
+ return
+ name.equals(other.name)
+ && repetition == other.repetition
+ && eqOrBothNull(repetition, other.repetition)
+ && eqOrBothNull(id, other.id);
+ };
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof Type) || other == null) {
+ return false;
+ }
+ return equals((Type)other);
+ }
+
+ protected boolean eqOrBothNull(Object o1, Object o2) {
+ return (o1 == null && o2 == null) || (o1 != null && o1.equals(o2));
+ }
+
+ protected abstract int getMaxRepetitionLevel(String[] path, int i);
+
+ protected abstract int getMaxDefinitionLevel(String[] path, int i);
+
+ protected abstract Type getType(String[] path, int i);
+
+ protected abstract List<String[]> getPaths(int depth);
+
+ protected abstract boolean containsPath(String[] path, int depth);
+
+ /**
+ * @param toMerge the type to merge into this one
+ * @return the union result of merging toMerge into this
+ */
+ protected abstract Type union(Type toMerge);
+
+ /**
+ * @param toMerge the type to merge into this one
+ * @param strict should schema primitive types match
+ * @return the union result of merging toMerge into this
+ */
+ protected abstract Type union(Type toMerge, boolean strict);
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ writeToStringBuilder(sb, "");
+ return sb.toString();
+ }
+
+ void checkContains(Type subType) {
+ if (!this.name.equals(subType.name)
+ || this.repetition != subType.repetition) {
+ throw new InvalidRecordException(subType + " found: expected " + this);
+ }
+ }
+
+ /**
+ *
+ * @param converter logic to convert the tree
+ * @return the converted tree
+ */
+ abstract <T> T convert(List<GroupType> path, TypeConverter<T> converter);
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/TypeConverter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/TypeConverter.java b/parquet-column/src/main/java/org/apache/parquet/schema/TypeConverter.java
new file mode 100644
index 0000000..c22a877
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/TypeConverter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import java.util.List;
+
+/**
+ * to convert a MessageType tree
+ * @see Type#convert(TypeConverter)
+ *
+ * @author Julien Le Dem
+ *
+ * @param <T> the resulting Type
+ */
+public interface TypeConverter<T> {
+
+ /**
+ * @param path the path to that node
+ * @param primitiveType the type to convert
+ * @return the result of conversion
+ */
+ T convertPrimitiveType(List<GroupType> path, PrimitiveType primitiveType);
+
+ /**
+ * @param path the path to that node
+ * @param groupType the type to convert
+ * @param children its children already converted
+ * @return the result of conversion
+ */
+ T convertGroupType(List<GroupType> path, GroupType groupType, List<T> children);
+
+ /**
+ * @param messageType the type to convert
+ * @param children its children already converted
+ * @return the result of conversion
+ */
+ T convertMessageType(MessageType messageType, List<T> children);
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/TypeVisitor.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/TypeVisitor.java b/parquet-column/src/main/java/org/apache/parquet/schema/TypeVisitor.java
new file mode 100644
index 0000000..bd7a548
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/TypeVisitor.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+/**
+ * Implement this interface to visit a schema
+ *
+ * type.accept(new TypeVisitor() { ... });
+ *
+ * @author Julien Le Dem
+ *
+ */
+public interface TypeVisitor {
+
+ /**
+ * @param groupType the group type to visit
+ */
+ void visit(GroupType groupType);
+
+ /**
+ * @param messageType the message type to visit
+ */
+ void visit(MessageType messageType);
+
+ /**
+ * @param primitiveType the primitive type to visit
+ */
+ void visit(PrimitiveType primitiveType);
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
new file mode 100644
index 0000000..b06c2bc
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.ID;
+
+/**
+ * This class provides fluent builders that produce Parquet schema Types.
+ * <p>
+ * The most basic use is to build primitive types:
+ * <pre>
+ * Types.required(INT64).named("id");
+ * Types.optional(INT32).named("number");
+ * </pre>
+ * <p>
+ * The {@link #required(PrimitiveTypeName)} factory method produces a primitive
+ * type builder, and the {@link PrimitiveBuilder#named(String)} builds the
+ * {@link PrimitiveType}. Between {@code required} and {@code named}, other
+ * builder methods can be used to add type annotations or other type metadata:
+ * <pre>
+ * Types.required(BINARY).as(UTF8).named("username");
+ * Types.optional(FIXED_LEN_BYTE_ARRAY).length(20).named("sha1");
+ * </pre>
+ * <p>
+ * Optional types are built using {@link #optional(PrimitiveTypeName)} to get
+ * the builder.
+ * <p>
+ * Groups are built similarly, using {@code requiredGroup()} (or the optional
+ * version) to return a group builder. Group builders provide {@code required}
+ * and {@code optional} to add primitive types, which return primitive builders
+ * like the versions above.
+ * <pre>
+ * // This produces:
+ * // required group User {
+ * // required int64 id;
+ * // optional binary email (UTF8);
+ * // }
+ * Types.requiredGroup()
+ * .required(INT64).named("id")
+ * .required(BINARY).as(UTF8).named("email")
+ * .named("User")
+ * </pre>
+ * <p>
+ * When {@code required} is called on a group builder, the builder it returns
+ * will add the type to the parent group when it is built and {@code named} will
+ * return its parent group builder (instead of the type) so more fields can be
+ * added.
+ * <p>
+ * Sub-groups can be created using {@code requiredGroup()} to get a group
+ * builder that will create the group type, add it to the parent builder, and
+ * return the parent builder for more fields.
+ * <pre>
+ * // required group User {
+ * // required int64 id;
+ * // optional binary email (UTF8);
+ * // optional group address {
+ * // required binary street (UTF8);
+ * // required int32 zipcode;
+ * // }
+ * // }
+ * Types.requiredGroup()
+ * .required(INT64).named("id")
+ * .required(BINARY).as(UTF8).named("email")
+ * .optionalGroup()
+ * .required(BINARY).as(UTF8).named("street")
+ * .required(INT32).named("zipcode")
+ * .named("address")
+ * .named("User")
+ * </pre>
+ * <p>
+ * Message types are built using {@link #buildMessage()} and function just like
+ * group builders.
+ * <pre>
+ * // message User {
+ * // required int64 id;
+ * // optional binary email (UTF8);
+ * // optional group address {
+ * // required binary street (UTF8);
+ * // required int32 zipcode;
+ * // }
+ * // }
+ * Types.buildMessage()
+ * .required(INT64).named("id")
+ * .required(BINARY).as(UTF8).named("email")
+ * .optionalGroup()
+ * .required(BINARY).as(UTF8).named("street")
+ * .required(INT32).named("zipcode")
+ * .named("address")
+ * .named("User")
+ * </pre>
+ * <p>
+ * These builders enforce consistency checks based on the specifications in
+ * the parquet-format documentation. For example, if DECIMAL is used to annotate
+ * a FIXED_LEN_BYTE_ARRAY that is not long enough for its maximum precision,
+ * these builders will throw an IllegalArgumentException:
+ * <pre>
+ * // throws IllegalArgumentException with message:
+ * // "FIXED(4) is not long enough to store 10 digits"
+ * Types.required(FIXED_LEN_BYTE_ARRAY).length(4)
+ * .as(DECIMAL).precision(10)
+ * .named("badDecimal");
+ * </pre>
+ */
+public class Types {
+ private static final int NOT_SET = 0;
+
+ /**
+ * A base builder for {@link Type} objects.
+ *
+ * @param <P> The type that this builder will return from
+ * {@link #named(String)} when the type is built.
+ */
+ public abstract static class Builder<T extends Builder, P> {
+ protected final P parent;
+ protected final Class<? extends P> returnClass;
+
+ protected Type.Repetition repetition = null;
+ protected OriginalType originalType = null;
+ protected Type.ID id = null;
+ private boolean repetitionAlreadySet = false;
+
+ /**
+ * Construct a type builder that returns a "parent" object when the builder
+ * is finished. The {@code parent} will be returned by
+ * {@link #named(String)} so that builders can be chained.
+ *
+ * @param parent a non-null object to return from {@link #named(String)}
+ */
+ protected Builder(P parent) {
+ Preconditions.checkNotNull(parent, "Parent cannot be null");
+ this.parent = parent;
+ this.returnClass = null;
+ }
+
+ /**
+ * Construct a type builder that returns the {@link Type} that was built
+ * when the builder is finished. The {@code returnClass} must be the
+ * expected {@code Type} class.
+ *
+ * @param returnClass a {@code Type} to return from {@link #named(String)}
+ */
+ protected Builder(Class<P> returnClass) {
+ Preconditions.checkArgument(Type.class.isAssignableFrom(returnClass),
+ "The requested return class must extend Type");
+ this.returnClass = returnClass;
+ this.parent = null;
+ }
+
+ protected abstract T self();
+
+ protected final T repetition(Type.Repetition repetition) {
+ Preconditions.checkArgument(!repetitionAlreadySet,
+ "Repetition has already been set");
+ Preconditions.checkNotNull(repetition, "Repetition cannot be null");
+ this.repetition = repetition;
+ this.repetitionAlreadySet = true;
+ return self();
+ }
+
+ /**
+ * Adds a type annotation ({@link OriginalType}) to the type being built.
+ * <p>
+ * Type annotations are used to extend the types that parquet can store, by
+ * specifying how the primitive types should be interpreted. This keeps the
+ * set of primitive types to a minimum and reuses parquet's efficient
+ * encodings. For example, strings are stored as byte arrays (binary) with
+ * a UTF8 annotation.
+ *
+ * @param type an {@code OriginalType}
+ * @return this builder for method chaining
+ */
+ public T as(OriginalType type) {
+ this.originalType = type;
+ return self();
+ }
+
+ /**
+ * adds an id annotation to the type being built.
+ * <p>
+ * ids are used to capture the original id when converting from models using ids (thrift, protobufs)
+ *
+ * @param id the id of the field
+ * @return this builder for method chaining
+ */
+ public T id(int id) {
+ this.id = new ID(id);
+ return self();
+ }
+
+ abstract protected Type build(String name);
+
+ /**
+ * Builds a {@link Type} and returns the parent builder, if given, or the
+ * {@code Type} that was built. If returning a parent object that is a
+ * GroupBuilder, the constructed type will be added to it as a field.
+ * <p>
+ * <em>Note:</em> Any configuration for this type builder should be done
+ * before calling this method.
+ *
+ * @param name a name for the constructed type
+ * @return the parent {@code GroupBuilder} or the constructed {@code Type}
+ */
+ public P named(String name) {
+ Preconditions.checkNotNull(name, "Name is required");
+ Preconditions.checkNotNull(repetition, "Repetition is required");
+
+ Type type = build(name);
+ if (parent != null) {
+ // if the parent is a GroupBuilder, add type to it
+ if (GroupBuilder.class.isAssignableFrom(parent.getClass())) {
+ GroupBuilder.class.cast(parent).addField(type);
+ }
+ return parent;
+ } else {
+ // no parent indicates that the Type object should be returned
+ // the constructor check guarantees that returnClass is a Type
+ return returnClass.cast(type);
+ }
+ }
+
+ }
+
+ /**
+ * A builder for {@link PrimitiveType} objects.
+ *
+ * @param <P> The type that this builder will return from
+ * {@link #named(String)} when the type is built.
+ */
+ public static class PrimitiveBuilder<P> extends Builder<PrimitiveBuilder<P>, P> {
+ private static final long MAX_PRECISION_INT32 = maxPrecision(4);
+ private static final long MAX_PRECISION_INT64 = maxPrecision(8);
+ private final PrimitiveTypeName primitiveType;
+ private int length = NOT_SET;
+ private int precision = NOT_SET;
+ private int scale = NOT_SET;
+
+ private PrimitiveBuilder(P parent, PrimitiveTypeName type) {
+ super(parent);
+ this.primitiveType = type;
+ }
+
+ private PrimitiveBuilder(Class<P> returnType, PrimitiveTypeName type) {
+ super(returnType);
+ this.primitiveType = type;
+ }
+
+ @Override
+ protected PrimitiveBuilder<P> self() {
+ return this;
+ }
+
+ /**
+ * Adds the length for a FIXED_LEN_BYTE_ARRAY.
+ *
+ * @param length an int length
+ * @return this builder for method chaining
+ */
+ public PrimitiveBuilder<P> length(int length) {
+ this.length = length;
+ return this;
+ }
+
+ /**
+ * Adds the precision for a DECIMAL.
+ * <p>
+ * This value is required for decimals and must be less than or equal to
+ * the maximum number of base-10 digits in the underlying type. A 4-byte
+ * fixed, for example, can store up to 9 base-10 digits.
+ *
+ * @param precision an int precision value for the DECIMAL
+ * @return this builder for method chaining
+ */
+ public PrimitiveBuilder<P> precision(int precision) {
+ this.precision = precision;
+ return this;
+ }
+
+ /**
+ * Adds the scale for a DECIMAL.
+ * <p>
+ * This value must be less than the maximum precision of the type and must
+ * be a positive number. If not set, the default scale is 0.
+ * <p>
+ * The scale specifies the number of digits of the underlying unscaled
+ * that are to the right of the decimal point. The decimal interpretation
+ * of values in this column is: {@code value*10^(-scale)}.
+ *
+ * @param scale an int scale value for the DECIMAL
+ * @return this builder for method chaining
+ */
+ public PrimitiveBuilder<P> scale(int scale) {
+ this.scale = scale;
+ return this;
+ }
+
+ @Override
+ protected PrimitiveType build(String name) {
+ if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitiveType) {
+ Preconditions.checkArgument(length > 0,
+ "Invalid FIXED_LEN_BYTE_ARRAY length: " + length);
+ }
+
+ DecimalMetadata meta = decimalMetadata();
+
+ // validate type annotations and required metadata
+ if (originalType != null) {
+ switch (originalType) {
+ case UTF8:
+ case JSON:
+ case BSON:
+ Preconditions.checkState(
+ primitiveType == PrimitiveTypeName.BINARY,
+ originalType.toString() + " can only annotate binary fields");
+ break;
+ case DECIMAL:
+ Preconditions.checkState(
+ (primitiveType == PrimitiveTypeName.INT32) ||
+ (primitiveType == PrimitiveTypeName.INT64) ||
+ (primitiveType == PrimitiveTypeName.BINARY) ||
+ (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY),
+ "DECIMAL can only annotate INT32, INT64, BINARY, and FIXED"
+ );
+ if (primitiveType == PrimitiveTypeName.INT32) {
+ Preconditions.checkState(
+ meta.getPrecision() <= MAX_PRECISION_INT32,
+ "INT32 cannot store " + meta.getPrecision() + " digits " +
+ "(max " + MAX_PRECISION_INT32 + ")");
+ } else if (primitiveType == PrimitiveTypeName.INT64) {
+ Preconditions.checkState(
+ meta.getPrecision() <= MAX_PRECISION_INT64,
+ "INT64 cannot store " + meta.getPrecision() + " digits " +
+ "(max " + MAX_PRECISION_INT64 + ")");
+ } else if (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
+ Preconditions.checkState(
+ meta.getPrecision() <= maxPrecision(length),
+ "FIXED(" + length + ") cannot store " + meta.getPrecision() +
+ " digits (max " + maxPrecision(length) + ")");
+ }
+ break;
+ case DATE:
+ case TIME_MILLIS:
+ case UINT_8:
+ case UINT_16:
+ case UINT_32:
+ case INT_8:
+ case INT_16:
+ case INT_32:
+ Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
+ originalType.toString() + " can only annotate INT32");
+ break;
+ case TIMESTAMP_MILLIS:
+ case UINT_64:
+ case INT_64:
+ Preconditions.checkState(primitiveType == PrimitiveTypeName.INT64,
+ originalType.toString() + " can only annotate INT64");
+ break;
+ case INTERVAL:
+ Preconditions.checkState(
+ (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
+ (length == 12),
+ "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)");
+ break;
+ case ENUM:
+ Preconditions.checkState(
+ primitiveType == PrimitiveTypeName.BINARY,
+ "ENUM can only annotate binary fields");
+ break;
+ default:
+ throw new IllegalStateException(originalType + " can not be applied to a primitive type");
+ }
+ }
+
+ return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id);
+ }
+
+ private static long maxPrecision(int numBytes) {
+ return Math.round( // convert double to long
+ Math.floor(Math.log10( // number of base-10 digits
+ Math.pow(2, 8 * numBytes - 1) - 1) // max value stored in numBytes
+ )
+ );
+ }
+
+ protected DecimalMetadata decimalMetadata() {
+ DecimalMetadata meta = null;
+ if (OriginalType.DECIMAL == originalType) {
+ Preconditions.checkArgument(precision > 0,
+ "Invalid DECIMAL precision: " + precision);
+ Preconditions.checkArgument(scale >= 0,
+ "Invalid DECIMAL scale: " + scale);
+ Preconditions.checkArgument(scale <= precision,
+ "Invalid DECIMAL scale: cannot be greater than precision");
+ meta = new DecimalMetadata(precision, scale);
+ }
+ return meta;
+ }
+ }
+
+ /**
+ * A builder for {@link GroupType} objects.
+ *
+ * @param <P> The type that this builder will return from
+ * {@link #named(String)} when the type is built.
+ */
+ public static class GroupBuilder<P> extends Builder<GroupBuilder<P>, P> {
+ protected final List<Type> fields;
+
+ private GroupBuilder(P parent) {
+ super(parent);
+ this.fields = new ArrayList<Type>();
+ }
+
+ private GroupBuilder(Class<P> returnType) {
+ super(returnType);
+ this.fields = new ArrayList<Type>();
+ }
+
+ @Override
+ protected GroupBuilder<P> self() {
+ return this;
+ }
+
+ public PrimitiveBuilder<GroupBuilder<P>> primitive(
+ PrimitiveTypeName type, Type.Repetition repetition) {
+ return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
+ .repetition(repetition);
+ }
+
+ /**
+ * Returns a {@link PrimitiveBuilder} for the required primitive type
+ * {@code type}.
+ *
+ * @param type a {@link PrimitiveTypeName}
+ * @return a primitive builder for {@code type} that will return this
+ * builder for additional fields.
+ */
+ public PrimitiveBuilder<GroupBuilder<P>> required(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
+ .repetition(Type.Repetition.REQUIRED);
+ }
+
+ /**
+ * Returns a {@link PrimitiveBuilder} for the optional primitive type
+ * {@code type}.
+ *
+ * @param type a {@link PrimitiveTypeName}
+ * @return a primitive builder for {@code type} that will return this
+ * builder for additional fields.
+ */
+ public PrimitiveBuilder<GroupBuilder<P>> optional(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
+ .repetition(Type.Repetition.OPTIONAL);
+ }
+
+ /**
+ * Returns a {@link PrimitiveBuilder} for the repeated primitive type
+ * {@code type}.
+ *
+ * @param type a {@link PrimitiveTypeName}
+ * @return a primitive builder for {@code type} that will return this
+ * builder for additional fields.
+ */
+ public PrimitiveBuilder<GroupBuilder<P>> repeated(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<GroupBuilder<P>>(this, type)
+ .repetition(Type.Repetition.REPEATED);
+ }
+
+ public GroupBuilder<GroupBuilder<P>> group(Type.Repetition repetition) {
+ return new GroupBuilder<GroupBuilder<P>>(this)
+ .repetition(repetition);
+ }
+
+ /**
+ * Returns a {@link GroupBuilder} to build a required sub-group.
+ *
+ * @return a group builder that will return this builder for additional
+ * fields.
+ */
+ public GroupBuilder<GroupBuilder<P>> requiredGroup() {
+ return new GroupBuilder<GroupBuilder<P>>(this)
+ .repetition(Type.Repetition.REQUIRED);
+ }
+
+ /**
+ * Returns a {@link GroupBuilder} to build an optional sub-group.
+ *
+ * @return a group builder that will return this builder for additional
+ * fields.
+ */
+ public GroupBuilder<GroupBuilder<P>> optionalGroup() {
+ return new GroupBuilder<GroupBuilder<P>>(this)
+ .repetition(Type.Repetition.OPTIONAL);
+ }
+
+ /**
+ * Returns a {@link GroupBuilder} to build a repeated sub-group.
+ *
+ * @return a group builder that will return this builder for additional
+ * fields.
+ */
+ public GroupBuilder<GroupBuilder<P>> repeatedGroup() {
+ return new GroupBuilder<GroupBuilder<P>>(this)
+ .repetition(Type.Repetition.REPEATED);
+ }
+
+ /**
+ * Adds {@code type} as a sub-field to the group configured by this builder.
+ *
+ * @return this builder for additional fields.
+ */
+ public GroupBuilder<P> addField(Type type) {
+ fields.add(type);
+ return this;
+ }
+
+ /**
+ * Adds {@code types} as sub-fields of the group configured by this builder.
+ *
+ * @return this builder for additional fields.
+ */
+ public GroupBuilder<P> addFields(Type... types) {
+ for (Type type : types) {
+ fields.add(type);
+ }
+ return this;
+ }
+
+ @Override
+ protected GroupType build(String name) {
+ Preconditions.checkState(!fields.isEmpty(),
+ "Cannot build an empty group");
+ return new GroupType(repetition, name, originalType, fields, id);
+ }
+ }
+
+ public static class MessageTypeBuilder extends GroupBuilder<MessageType> {
+ private MessageTypeBuilder() {
+ super(MessageType.class);
+ repetition(Type.Repetition.REQUIRED);
+ }
+
+ /**
+ * Builds and returns the {@link MessageType} configured by this builder.
+ * <p>
+ * <em>Note:</em> All primitive types and sub-groups should be added before
+ * calling this method.
+ *
+ * @param name a name for the constructed type
+ * @return the final {@code MessageType} configured by this builder.
+ */
+ @Override
+ public MessageType named(String name) {
+ Preconditions.checkNotNull(name, "Name is required");
+ return new MessageType(name, fields);
+ }
+ }
+
+ /**
+ * Returns a builder to construct a {@link MessageType}.
+ *
+ * @return a {@link MessageTypeBuilder}
+ */
+ public static MessageTypeBuilder buildMessage() {
+ return new MessageTypeBuilder();
+ }
+
+ public static GroupBuilder<GroupType> buildGroup(
+ Type.Repetition repetition) {
+ return new GroupBuilder<GroupType>(GroupType.class).repetition(repetition);
+ }
+
+ /**
+ * Returns a builder to construct a required {@link GroupType}.
+ *
+ * @return a {@link GroupBuilder}
+ */
+ public static GroupBuilder<GroupType> requiredGroup() {
+ return new GroupBuilder<GroupType>(GroupType.class)
+ .repetition(Type.Repetition.REQUIRED);
+ }
+
+ /**
+ * Returns a builder to construct an optional {@link GroupType}.
+ *
+ * @return a {@link GroupBuilder}
+ */
+ public static GroupBuilder<GroupType> optionalGroup() {
+ return new GroupBuilder<GroupType>(GroupType.class)
+ .repetition(Type.Repetition.OPTIONAL);
+ }
+
+ /**
+ * Returns a builder to construct a repeated {@link GroupType}.
+ *
+ * @return a {@link GroupBuilder}
+ */
+ public static GroupBuilder<GroupType> repeatedGroup() {
+ return new GroupBuilder<GroupType>(GroupType.class)
+ .repetition(Type.Repetition.REPEATED);
+ }
+
+ public static PrimitiveBuilder<PrimitiveType> primitive(
+ PrimitiveTypeName type, Type.Repetition repetition) {
+ return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
+ .repetition(repetition);
+ }
+
+ /**
+ * Returns a builder to construct a required {@link PrimitiveType}.
+ *
+ * @param type a {@link PrimitiveTypeName} for the constructed type
+ * @return a {@link PrimitiveBuilder}
+ */
+ public static PrimitiveBuilder<PrimitiveType> required(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
+ .repetition(Type.Repetition.REQUIRED);
+ }
+
+ /**
+ * Returns a builder to construct an optional {@link PrimitiveType}.
+ *
+ * @param type a {@link PrimitiveTypeName} for the constructed type
+ * @return a {@link PrimitiveBuilder}
+ */
+ public static PrimitiveBuilder<PrimitiveType> optional(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
+ .repetition(Type.Repetition.OPTIONAL);
+ }
+
+ /**
+ * Returns a builder to construct a repeated {@link PrimitiveType}.
+ *
+ * @param type a {@link PrimitiveTypeName} for the constructed type
+ * @return a {@link PrimitiveBuilder}
+ */
+ public static PrimitiveBuilder<PrimitiveType> repeated(
+ PrimitiveTypeName type) {
+ return new PrimitiveBuilder<PrimitiveType>(PrimitiveType.class, type)
+ .repetition(Type.Repetition.REPEATED);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/ColumnDescriptor.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/ColumnDescriptor.java b/parquet-column/src/main/java/parquet/column/ColumnDescriptor.java
deleted file mode 100644
index acbb248..0000000
--- a/parquet-column/src/main/java/parquet/column/ColumnDescriptor.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-import java.util.Arrays;
-
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-
-/**
- * Describes a column's type as well as its position in its containing schema.
- *
- * @author Julien Le Dem
- *
- */
-public class ColumnDescriptor implements Comparable<ColumnDescriptor> {
-
- private final String[] path;
- private final PrimitiveTypeName type;
- private final int typeLength;
- private final int maxRep;
- private final int maxDef;
-
- /**
- *
- * @param path the path to the leaf field in the schema
- * @param type the type of the field
- * @param maxRep the maximum repetition level for that path
- * @param maxDef the maximum definition level for that path
- */
- public ColumnDescriptor(String[] path, PrimitiveTypeName type, int maxRep,
- int maxDef) {
- this(path, type, 0, maxRep, maxDef);
- }
-
- /**
- *
- * @param path the path to the leaf field in the schema
- * @param type the type of the field
- * @param maxRep the maximum repetition level for that path
- * @param maxDef the maximum definition level for that path
- */
- public ColumnDescriptor(String[] path, PrimitiveTypeName type,
- int typeLength, int maxRep, int maxDef) {
- super();
- this.path = path;
- this.type = type;
- this.typeLength = typeLength;
- this.maxRep = maxRep;
- this.maxDef = maxDef;
- }
-
- /**
- * @return the path to the leaf field in the schema
- */
- public String[] getPath() {
- return path;
- }
-
- /**
- * @return the maximum repetition level for that path
- */
- public int getMaxRepetitionLevel() {
- return maxRep;
- }
-
- /**
- * @return the maximum definition level for that path
- */
- public int getMaxDefinitionLevel() {
- return maxDef;
- }
-
- /**
- * @return the type of that column
- */
- public PrimitiveTypeName getType() {
- return type;
- }
-
- /**
- * @return the size of the type
- **/
- public int getTypeLength() {
- return typeLength;
- }
-
- @Override
- public int hashCode() {
- return Arrays.hashCode(path);
- }
-
- @Override
- public boolean equals(Object obj) {
- return Arrays.equals(path, ((ColumnDescriptor)obj).path);
- }
-
- @Override
- public int compareTo(ColumnDescriptor o) {
- // TODO(julien): this will fail if o.path.length < this.path.length
- for (int i = 0; i < path.length; i++) {
- int compareTo = path[i].compareTo(o.path[i]);
- if (compareTo != 0) {
- return compareTo;
- }
- }
- return 0;
- }
-
- @Override
- public String toString() {
- return Arrays.toString(path) + " " + type;
- }
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/ColumnReadStore.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/ColumnReadStore.java b/parquet-column/src/main/java/parquet/column/ColumnReadStore.java
deleted file mode 100644
index dc8c3a8..0000000
--- a/parquet-column/src/main/java/parquet/column/ColumnReadStore.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-/**
- * Container which can produce a ColumnReader for any given column in a schema.
- *
- * @author Julien Le Dem
- */
-public interface ColumnReadStore {
-
- /**
- * @param path the column to read
- * @return the column reader for that descriptor
- */
- abstract public ColumnReader getColumnReader(ColumnDescriptor path);
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/ColumnReader.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/ColumnReader.java b/parquet-column/src/main/java/parquet/column/ColumnReader.java
deleted file mode 100644
index bc13316..0000000
--- a/parquet-column/src/main/java/parquet/column/ColumnReader.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-import parquet.io.api.Binary;
-
-/**
- * Reader for (repetition level, definition level, values) triplets.
- * At any given point in time, a ColumnReader points to a single (r, d, v) triplet.
- * In order to move to the next triplet, call {@link #consume()}.
- *
- * Depending on the type and the encoding of the column only a subset of the get* methods are implemented.
- * Dictionary specific methods enable the upper layers to read the dictionary IDs without decoding the data.
- * In particular the Converter will decode the strings in the dictionary only once and iterate on the
- * dictionary IDs instead of the values.
- *
- * <ul>Each iteration looks at the current definition level and value as well as the next
- * repetition level:
- * <li> The current definition level defines if the value is null.</li>
- * <li> If the value is defined we can read it with the correct get*() method.</li>
- * <li> Looking ahead to the next repetition determines what is the next column to read for in the FSA.</li>
- * </ul>
- * @author Julien Le Dem
- */
-public interface ColumnReader {
-
- /**
- * @return the totalCount of values to be consumed
- */
- long getTotalValueCount();
-
- /**
- * Consume the current triplet, moving to the next value.
- */
- void consume();
-
- /**
- * must return 0 when isFullyConsumed() == true
- * @return the repetition level for the current value
- */
- int getCurrentRepetitionLevel();
-
- /**
- * @return the definition level for the current value
- */
- int getCurrentDefinitionLevel();
-
- /**
- * writes the current value to the converter
- */
- void writeCurrentValueToConverter();
-
- /**
- * Skip the current value
- */
- void skip();
-
- /**
- * available when the underlying encoding is dictionary based
- * @return the dictionary id for the current value
- */
- int getCurrentValueDictionaryID();
-
- /**
- * @return the current value
- */
- int getInteger();
-
- /**
- * @return the current value
- */
- boolean getBoolean();
-
- /**
- * @return the current value
- */
- long getLong();
-
- /**
- * @return the current value
- */
- Binary getBinary();
-
- /**
- * @return the current value
- */
- float getFloat();
-
- /**
- * @return the current value
- */
- double getDouble();
-
- /**
- * @return Descriptor of the column.
- */
- ColumnDescriptor getDescriptor();
-
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/ColumnWriteStore.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/ColumnWriteStore.java b/parquet-column/src/main/java/parquet/column/ColumnWriteStore.java
deleted file mode 100644
index 98058ae..0000000
--- a/parquet-column/src/main/java/parquet/column/ColumnWriteStore.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-/**
- * Container which can construct writers for multiple columns to be stored
- * together.
- *
- * @author Julien Le Dem
- */
-public interface ColumnWriteStore {
- /**
- * @param path the column for which to create a writer
- * @return the column writer for the given column
- */
- abstract public ColumnWriter getColumnWriter(ColumnDescriptor path);
-
- /**
- * when we are done writing to flush to the underlying storage
- */
- abstract public void flush();
-
- /**
- * called to notify of record boundaries
- */
- abstract public void endRecord();
-
- /**
- * used for information
- * @return approximate size used in memory
- */
- abstract public long getAllocatedSize();
-
- /**
- * used to flush row groups to disk
- * @return approximate size of the buffered encoded binary data
- */
- abstract public long getBufferedSize();
-
- /**
- * used for debugging pupose
- * @return a formated string representing memory usage per column
- */
- abstract public String memUsageString();
-}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/ColumnWriter.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/ColumnWriter.java b/parquet-column/src/main/java/parquet/column/ColumnWriter.java
deleted file mode 100644
index e639229..0000000
--- a/parquet-column/src/main/java/parquet/column/ColumnWriter.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-import parquet.io.api.Binary;
-
-/**
- * writer for (repetition level, definition level, values) triplets
- *
- * @author Julien Le Dem
- *
- */
-public interface ColumnWriter {
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(int value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(long value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(boolean value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(Binary value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(float value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current value
- * @param value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void write(double value, int repetitionLevel, int definitionLevel);
-
- /**
- * writes the current null value
- * @param repetitionLevel
- * @param definitionLevel
- */
- void writeNull(int repetitionLevel, int definitionLevel);
-
-}
-
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/b10870e4/parquet-column/src/main/java/parquet/column/Dictionary.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/column/Dictionary.java b/parquet-column/src/main/java/parquet/column/Dictionary.java
deleted file mode 100644
index b03f371..0000000
--- a/parquet-column/src/main/java/parquet/column/Dictionary.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package parquet.column;
-
-import parquet.io.api.Binary;
-
-/**
- * a dictionary to decode dictionary based encodings
- *
- * @author Julien Le Dem
- *
- */
-public abstract class Dictionary {
-
- private final Encoding encoding;
-
- public Dictionary(Encoding encoding) {
- this.encoding = encoding;
- }
-
- public Encoding getEncoding() {
- return encoding;
- }
-
- public abstract int getMaxId();
-
- public Binary decodeToBinary(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-
- public int decodeToInt(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-
- public long decodeToLong(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-
- public float decodeToFloat(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-
- public double decodeToDouble(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-
- public boolean decodeToBoolean(int id) {
- throw new UnsupportedOperationException(this.getClass().getName());
- }
-}