You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ti...@apache.org on 2014/09/25 20:26:05 UTC
git commit: PARQUET-90: integrate field ids in schema
Repository: incubator-parquet-mr
Updated Branches:
refs/heads/master 0c4f13a84 -> 3a082e8e3
PARQUET-90: integrate field ids in schema
This integrates support for field is that was introduced in Parquet format.
Thrift and Protobufs ids will now be saved in the Parquet schema.
Author: julien <ju...@twitter.com>
Closes #56 from julienledem/field_ids and squashes the following commits:
62c2809 [julien] remove withOriginalType; use Typles builder more
8ff0034 [julien] review feedback
084c8be [julien] binary compat
85d785c [julien] add proto id in schema; fix schema parsing for ids
d4be488 [julien] integrate field ids in schema
Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/3a082e8e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/3a082e8e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/3a082e8e
Branch: refs/heads/master
Commit: 3a082e8e390898646c094d20f4ec1eeba45b79ac
Parents: 0c4f13a
Author: julien <ju...@twitter.com>
Authored: Thu Sep 25 11:25:53 2014 -0700
Committer: Tianshuo Deng <td...@twitter.com>
Committed: Thu Sep 25 11:25:53 2014 -0700
----------------------------------------------------------------------
.../src/main/java/parquet/schema/GroupType.java | 82 ++++++--
.../main/java/parquet/schema/MessageType.java | 10 +-
.../java/parquet/schema/MessageTypeParser.java | 21 +-
.../main/java/parquet/schema/PrimitiveType.java | 77 ++++---
.../src/main/java/parquet/schema/Type.java | 102 +++++++++-
.../src/main/java/parquet/schema/Types.java | 31 ++-
.../java/parquet/parser/TestParquetParser.java | 200 +++++++++++--------
.../java/parquet/schema/TestMessageType.java | 14 +-
.../java/parquet/schema/TestTypeBuilders.java | 16 +-
.../converter/ParquetMetadataConverter.java | 3 +
.../java/parquet/pig/PigSchemaConverter.java | 64 +++---
.../parquet/proto/ProtoSchemaConverter.java | 106 +++++-----
.../parquet/proto/ProtoSchemaConverterTest.java | 58 +++---
.../thrift/ThriftSchemaConvertVisitor.java | 96 +++++----
.../parquet/thrift/projection/FieldsPath.java | 5 +-
.../thrift/TestThriftSchemaConverter.java | 113 ++++++-----
16 files changed, 611 insertions(+), 387 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/GroupType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/GroupType.java b/parquet-column/src/main/java/parquet/schema/GroupType.java
index fefbcb2..3d16300 100644
--- a/parquet-column/src/main/java/parquet/schema/GroupType.java
+++ b/parquet-column/src/main/java/parquet/schema/GroupType.java
@@ -15,6 +15,8 @@
*/
package parquet.schema;
+import static java.util.Arrays.asList;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -40,7 +42,7 @@ public class GroupType extends Type {
* @param fields the contained fields
*/
public GroupType(Repetition repetition, String name, List<Type> fields) {
- this(repetition, name, null, fields);
+ this(repetition, name, null, fields, null);
}
/**
@@ -49,7 +51,7 @@ public class GroupType extends Type {
* @param fields the contained fields
*/
public GroupType(Repetition repetition, String name, Type... fields) {
- this(repetition, name, null, fields);
+ this(repetition, name, Arrays.asList(fields));
}
/**
@@ -58,6 +60,7 @@ public class GroupType extends Type {
* @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
* @param fields the contained fields
*/
+ @Deprecated
public GroupType(Repetition repetition, String name, OriginalType originalType, Type... fields) {
this(repetition, name, originalType, Arrays.asList(fields));
}
@@ -68,8 +71,20 @@ public class GroupType extends Type {
* @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
* @param fields the contained fields
*/
+ @Deprecated
public GroupType(Repetition repetition, String name, OriginalType originalType, List<Type> fields) {
- super(name, repetition, originalType);
+ this(repetition, name, originalType, fields, null);
+ }
+
+ /**
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param name the name of the field
+ * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+ * @param fields the contained fields
+ * @param id the id of the field
+ */
+ GroupType(Repetition repetition, String name, OriginalType originalType, List<Type> fields, ID id) {
+ super(name, repetition, originalType, id);
this.fields = fields;
this.indexByName = new HashMap<String, Integer>();
for (int i = 0; i < fields.size(); i++) {
@@ -78,6 +93,31 @@ public class GroupType extends Type {
}
/**
+ * @param id the field id
+ * @return a new GroupType with the same fields and a new id
+ */
+ @Override
+ public GroupType withId(int id) {
+ return new GroupType(getRepetition(), getName(), getOriginalType(), fields, new ID(id));
+ }
+
+ /**
+ * @param newFields
+ * @return a group with the same attributes and new fields.
+ */
+ public GroupType withNewFields(List<Type> newFields) {
+ return new GroupType(getRepetition(), getName(), getOriginalType(), newFields, getId());
+ }
+
+ /**
+ * @param newFields
+ * @return a group with the same attributes and new fields.
+ */
+ public GroupType withNewFields(Type... newFields) {
+ return withNewFields(asList(newFields));
+ }
+
+ /**
* returns the name of the corresponding field
* @param index the index of the desired field in this type
* @return the name of the field at this index
@@ -169,6 +209,7 @@ public class GroupType extends Type {
.append(" group ")
.append(getName())
.append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
+ .append(getId() == null ? "" : " = " + getId())
.append(" {\n");
membersDisplayString(sb, indent + " ");
sb.append(indent)
@@ -183,32 +224,33 @@ public class GroupType extends Type {
visitor.visit(this);
}
+ @Override @Deprecated
+ protected int typeHashCode() {
+ return hashCode();
+ }
+
+ @Override @Deprecated
+ protected boolean typeEquals(Type other) {
+ return equals(other);
+ }
+
/**
* {@inheritDoc}
*/
@Override
- protected int typeHashCode() {
- int c = 17;
- c += 31 * getRepetition().hashCode();
- c += 31 * getName().hashCode();
- c += 31 * getFields().hashCode();
- return c;
+ public int hashCode() {
+ return super.hashCode() * 31 + getFields().hashCode();
}
/**
* {@inheritDoc}
*/
@Override
- protected boolean typeEquals(Type other) {
- Type otherType = (Type) other;
- if (otherType.isPrimitive()) {
- return false;
- } else {
- GroupType groupType = otherType.asGroupType();
- return getRepetition() == groupType.getRepetition() &&
- getName().equals(groupType.getName()) &&
- getFields().equals(groupType.getFields());
- }
+ protected boolean equals(Type otherType) {
+ return
+ !otherType.isPrimitive()
+ && super.equals(otherType)
+ && getFields().equals(otherType.asGroupType().getFields());
}
@Override
@@ -312,7 +354,7 @@ public class GroupType extends Type {
List<Type> mergeFields(GroupType toMerge) {
return mergeFields(toMerge, true);
}
-
+
/**
* produces the list of fields resulting from merging toMerge into the fields of this
* @param toMerge the group containing the fields to merge
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/MessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/MessageType.java b/parquet-column/src/main/java/parquet/schema/MessageType.java
index 9c05747..6815685 100644
--- a/parquet-column/src/main/java/parquet/schema/MessageType.java
+++ b/parquet-column/src/main/java/parquet/schema/MessageType.java
@@ -20,8 +20,6 @@ import java.util.List;
import parquet.column.ColumnDescriptor;
import parquet.io.InvalidRecordException;
-import parquet.schema.PrimitiveType.PrimitiveTypeName;
-
/**
* The root of a schema
@@ -109,10 +107,10 @@ public final class MessageType extends GroupType {
// TODO: optimize this
PrimitiveType primitiveType = getType(path).asPrimitiveType();
columns.add(new ColumnDescriptor(
- path,
- primitiveType.getPrimitiveTypeName(),
+ path,
+ primitiveType.getPrimitiveTypeName(),
primitiveType.getTypeLength(),
- getMaxRepetitionLevel(path),
+ getMaxRepetitionLevel(path),
getMaxDefinitionLevel(path)));
}
return columns;
@@ -139,7 +137,7 @@ public final class MessageType extends GroupType {
public MessageType union(MessageType toMerge) {
return union(toMerge, true);
}
-
+
public MessageType union(MessageType toMerge, boolean strict) {
return new MessageType(this.getName(), mergeFields(toMerge, strict));
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
index b1dca02..01dfe17 100644
--- a/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
+++ b/parquet-column/src/main/java/parquet/schema/MessageTypeParser.java
@@ -21,6 +21,8 @@ import java.util.StringTokenizer;
import parquet.Log;
import parquet.schema.PrimitiveType.PrimitiveTypeName;
import parquet.schema.Type.Repetition;
+import parquet.schema.Types.GroupBuilder;
+import parquet.schema.Types.PrimitiveBuilder;
/**
* Parses a schema from a textual format similar to that described in the Dremel paper.
@@ -38,7 +40,7 @@ public class MessageTypeParser {
private StringBuffer currentLine = new StringBuffer();
public Tokenizer(String schemaString, String string) {
- st = new StringTokenizer(schemaString, " ,;{}()\n\t", true);
+ st = new StringTokenizer(schemaString, " ,;{}()\n\t=", true);
}
public String nextToken() {
@@ -107,8 +109,8 @@ public class MessageTypeParser {
}
}
- private static void addGroupType(String t, Tokenizer st, Repetition r, Types.GroupBuilder builder) {
- Types.GroupBuilder childBuilder = builder.group(r);
+ private static void addGroupType(String t, Tokenizer st, Repetition r, GroupBuilder<?> builder) {
+ GroupBuilder<?> childBuilder = builder.group(r);
String name = st.nextToken();
// Read annotation, if any.
@@ -120,7 +122,10 @@ public class MessageTypeParser {
check(st.nextToken(), ")", "original type ended by )", st);
t = st.nextToken();
}
-
+ if (t.equals("=")) {
+ childBuilder.id(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ }
try {
addGroupTypeFields(t, st, childBuilder);
} catch (IllegalArgumentException e) {
@@ -130,8 +135,8 @@ public class MessageTypeParser {
childBuilder.named(name);
}
- private static void addPrimitiveType(String t, Tokenizer st, PrimitiveTypeName type, Repetition r, Types.GroupBuilder builder) {
- Types.PrimitiveBuilder childBuilder = builder.primitive(type, r);
+ private static void addPrimitiveType(String t, Tokenizer st, PrimitiveTypeName type, Repetition r, Types.GroupBuilder<?> builder) {
+ PrimitiveBuilder<?> childBuilder = builder.primitive(type, r);
if (type == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
t = st.nextToken();
@@ -170,6 +175,10 @@ public class MessageTypeParser {
check(t, ")", "original type ended by )", st);
t = st.nextToken();
}
+ if (t.equals("=")) {
+ childBuilder.id(Integer.parseInt(st.nextToken()));
+ t = st.nextToken();
+ }
check(t, ";", "field ended by ';'", st);
try {
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
index 3a734a9..88a402f 100644
--- a/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/parquet/schema/PrimitiveType.java
@@ -276,15 +276,15 @@ public final class PrimitiveType extends Type {
private final PrimitiveTypeName primitive;
private final int length;
private final DecimalMetadata decimalMeta;
-
+
/**
* @param repetition OPTIONAL, REPEATED, REQUIRED
* @param primitive STRING, INT64, ...
* @param name the name of the type
*/
- public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
String name) {
- this(repetition, primitive, 0, name, null, null);
+ this(repetition, primitive, 0, name, null, null, null);
}
/**
@@ -294,7 +294,7 @@ public final class PrimitiveType extends Type {
* @param name the name of the type
*/
public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name) {
- this(repetition, primitive, length, name, null, null);
+ this(repetition, primitive, length, name, null, null, null);
}
/**
@@ -303,9 +303,9 @@ public final class PrimitiveType extends Type {
* @param name the name of the type
* @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...)
*/
- public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
+ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
String name, OriginalType originalType) {
- this(repetition, primitive, 0, name, originalType, null);
+ this(repetition, primitive, 0, name, originalType, null, null);
}
/**
@@ -315,9 +315,10 @@ public final class PrimitiveType extends Type {
* @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise (XXX)
* @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
*/
+ @Deprecated
public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
int length, String name, OriginalType originalType) {
- this(repetition, primitive, length, name, originalType, null);
+ this(repetition, primitive, length, name, originalType, null, null);
}
/**
@@ -327,17 +328,28 @@ public final class PrimitiveType extends Type {
* @param length the length if the type is FIXED_LEN_BYTE_ARRAY, 0 otherwise
* @param originalType (optional) the original type (MAP, DECIMAL, UTF8, ...)
* @param decimalMeta (optional) metadata about the decimal type
+ * @param id the id of the field
*/
- PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
- int length, String name, OriginalType originalType,
- DecimalMetadata decimalMeta) {
- super(name, repetition, originalType);
+ PrimitiveType(
+ Repetition repetition, PrimitiveTypeName primitive,
+ int length, String name, OriginalType originalType,
+ DecimalMetadata decimalMeta, ID id) {
+ super(name, repetition, originalType, id);
this.primitive = primitive;
this.length = length;
this.decimalMeta = decimalMeta;
}
/**
+ * @param id the field id
+ * @return a new PrimitiveType with the same fields and a new id
+ */
+ @Override
+ public PrimitiveType withId(int id) {
+ return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id));
+ }
+
+ /**
* @return the primitive type
*/
public PrimitiveTypeName getPrimitiveTypeName() {
@@ -399,36 +411,47 @@ public final class PrimitiveType extends Type {
}
sb.append(")");
}
+ if (getId() != null) {
+ sb.append(" = ").append(getId());
+ }
+ }
+
+ @Override @Deprecated
+ protected int typeHashCode() {
+ return hashCode();
+ }
+
+ @Override @Deprecated
+ protected boolean typeEquals(Type other) {
+ return equals(other);
}
/**
* {@inheritDoc}
*/
@Override
- protected boolean typeEquals(Type other) {
- if (other.isPrimitive()) {
- PrimitiveType primitiveType = other.asPrimitiveType();
- if ((getPrimitiveTypeName() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
- (getTypeLength() != primitiveType.getTypeLength())) {
- return false;
- }
- return getRepetition() == primitiveType.getRepetition() &&
- getPrimitiveTypeName().equals(primitiveType.getPrimitiveTypeName()) &&
- getName().equals(primitiveType.getName());
- } else {
+ protected boolean equals(Type other) {
+ if (!other.isPrimitive()) {
return false;
}
+ PrimitiveType otherPrimitive = other.asPrimitiveType();
+ return super.equals(other)
+ && primitive == otherPrimitive.getPrimitiveTypeName()
+ && length == otherPrimitive.length
+ && eqOrBothNull(decimalMeta, otherPrimitive.decimalMeta);
}
/**
* {@inheritDoc}
*/
@Override
- protected int typeHashCode() {
- int hash = 17;
- hash += 31 * getRepetition().hashCode();
- hash += 31 * getPrimitiveTypeName().hashCode();
- hash += 31 * getName().hashCode();
+ public int hashCode() {
+ int hash = super.hashCode();
+ hash = hash * 31 + primitive.hashCode();
+ hash = hash * 31 + length;
+ if (decimalMeta != null) {
+ hash = hash * 31 + decimalMeta.hashCode();
+ }
return hash;
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/Type.java b/parquet-column/src/main/java/parquet/schema/Type.java
index 2e02387..32128e3 100644
--- a/parquet-column/src/main/java/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/parquet/schema/Type.java
@@ -15,6 +15,8 @@
*/
package parquet.schema;
+import static parquet.Preconditions.checkNotNull;
+
import java.util.List;
import parquet.io.InvalidRecordException;
@@ -28,6 +30,39 @@ import parquet.io.InvalidRecordException;
abstract public class Type {
/**
+ * represents a field ID
+ *
+ * @author Julien Le Dem
+ *
+ */
+ public static final class ID {
+ private final int id;
+
+ public ID(int id) {
+ this.id = id;
+ }
+
+ public int intValue() {
+ return id;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return (obj instanceof ID) && ((ID)obj).id == id;
+ }
+
+ @Override
+ public int hashCode() {
+ return id;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(id);
+ }
+ }
+
+ /**
* Constraint on the repetition of a field
*
* @author Julien Le Dem
@@ -73,13 +108,15 @@ abstract public class Type {
private final String name;
private final Repetition repetition;
private final OriginalType originalType;
+ private final ID id;
/**
* @param name the name of the type
* @param repetition OPTIONAL, REPEATED, REQUIRED
*/
+ @Deprecated
public Type(String name, Repetition repetition) {
- this(name, repetition, null);
+ this(name, repetition, null, null);
}
/**
@@ -87,14 +124,32 @@ abstract public class Type {
* @param repetition OPTIONAL, REPEATED, REQUIRED
* @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
*/
+ @Deprecated
public Type(String name, Repetition repetition, OriginalType originalType) {
+ this(name, repetition, originalType, null);
+ }
+
+ /**
+ * @param name the name of the type
+ * @param repetition OPTIONAL, REPEATED, REQUIRED
+ * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...)
+ * @param id (optional) the id of the fields.
+ */
+ Type(String name, Repetition repetition, OriginalType originalType, ID id) {
super();
- this.name = name;
- this.repetition = repetition;
+ this.name = checkNotNull(name, "name");
+ this.repetition = checkNotNull(repetition, "repetition");
this.originalType = originalType;
+ this.id = id;
}
/**
+ * @param id
+ * @return the same type with the id field set
+ */
+ public abstract Type withId(int id);
+
+ /**
* @return the name of the type
*/
public String getName() {
@@ -117,6 +172,13 @@ abstract public class Type {
}
/**
+ * @return the id of the field (if defined)
+ */
+ public ID getId() {
+ return id;
+ }
+
+ /**
* @return the original type (LIST, MAP, ...)
*/
public OriginalType getOriginalType() {
@@ -163,21 +225,43 @@ abstract public class Type {
*/
abstract public void accept(TypeVisitor visitor);
+ @Deprecated
+ abstract protected int typeHashCode();
+
+ @Deprecated
+ abstract protected boolean typeEquals(Type other);
+
@Override
public int hashCode() {
- return typeHashCode();
+ int c = repetition.hashCode();
+ c = 31 * c + name.hashCode();
+ if (originalType != null) {
+ c = 31 * c + originalType.hashCode();
+ }
+ if (id != null) {
+ c = 31 * c + id.hashCode();
+ }
+ return c;
}
- protected abstract int typeHashCode();
-
- protected abstract boolean typeEquals(Type other);
+ protected boolean equals(Type other) {
+ return
+ name.equals(other.name)
+ && repetition == other.repetition
+ && eqOrBothNull(repetition, other.repetition)
+ && eqOrBothNull(id, other.id);
+ };
@Override
public boolean equals(Object other) {
if (!(other instanceof Type) || other == null) {
return false;
}
- return typeEquals((Type)other);
+ return equals((Type)other);
+ }
+
+ protected boolean eqOrBothNull(Object o1, Object o2) {
+ return (o1 == null && o2 == null) || (o1 != null && o1.equals(o2));
}
protected abstract int getMaxRepetitionLevel(String[] path, int i);
@@ -195,7 +279,7 @@ abstract public class Type {
* @return the union result of merging toMerge into this
*/
protected abstract Type union(Type toMerge);
-
+
/**
* @param toMerge the type to merge into this one
* @param strict should schema primitive types match
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/main/java/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/parquet/schema/Types.java b/parquet-column/src/main/java/parquet/schema/Types.java
index 213b856..06077df 100644
--- a/parquet-column/src/main/java/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/parquet/schema/Types.java
@@ -2,8 +2,10 @@ package parquet.schema;
import java.util.ArrayList;
import java.util.List;
+
import parquet.Preconditions;
import parquet.schema.PrimitiveType.PrimitiveTypeName;
+import parquet.schema.Type.ID;
/**
* This class provides fluent builders that produce Parquet schema Types.
@@ -117,6 +119,7 @@ public class Types {
protected Type.Repetition repetition = null;
protected OriginalType originalType = null;
+ protected Type.ID id = null;
private boolean repetitionAlreadySet = false;
/**
@@ -174,6 +177,19 @@ public class Types {
return self();
}
+ /**
+ * adds an id annotation to the type being built.
+ * <p>
+ * ids are used to capture the original id when converting from models using ids (thrift, protobufs)
+ *
+ * @param id the id of the field
+ * @return this builder for method chaining
+ */
+ public T id(int id) {
+ this.id = new ID(id);
+ return self();
+ }
+
abstract protected Type build(String name);
/**
@@ -204,6 +220,7 @@ public class Types {
return returnClass.cast(type);
}
}
+
}
/**
@@ -296,7 +313,6 @@ public class Types {
primitiveType == PrimitiveTypeName.BINARY,
"UTF8 can only annotate binary fields");
break;
-
case DECIMAL:
Preconditions.checkState(
(primitiveType == PrimitiveTypeName.INT32) ||
@@ -321,11 +337,18 @@ public class Types {
"FIXED(" + length + ") cannot store " + meta.getPrecision() +
" digits (max " + maxPrecision(length) + ")");
}
+ break;
+ case ENUM:
+ Preconditions.checkState(
+ primitiveType == PrimitiveTypeName.BINARY,
+ "ENUM can only annotate binary fields");
+ break;
+ default:
+ throw new IllegalStateException(originalType + " can not be applied to a primitive type");
}
}
- return new PrimitiveType(
- repetition, primitiveType, length, name, originalType, meta);
+ return new PrimitiveType(repetition, primitiveType, length, name, originalType, meta, id);
}
private static long maxPrecision(int numBytes) {
@@ -487,7 +510,7 @@ public class Types {
protected GroupType build(String name) {
Preconditions.checkState(!fields.isEmpty(),
"Cannot build an empty group");
- return new GroupType(repetition, name, originalType, fields);
+ return new GroupType(repetition, name, originalType, fields, id);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
index 7fd1184..d1fb975 100644
--- a/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
+++ b/parquet-column/src/test/java/parquet/parser/TestParquetParser.java
@@ -16,69 +16,75 @@
package parquet.parser;
import static org.junit.Assert.assertEquals;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.*;
-import static parquet.schema.Type.Repetition.*;
+import static parquet.schema.MessageTypeParser.parseMessageType;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static parquet.schema.Type.Repetition.OPTIONAL;
+import static parquet.schema.Type.Repetition.REPEATED;
+import static parquet.schema.Type.Repetition.REQUIRED;
+import static parquet.schema.Types.buildMessage;
-import org.junit.Assert;
import org.junit.Test;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
-import parquet.schema.MessageTypeParser;
import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
import parquet.schema.PrimitiveType.PrimitiveTypeName;
-import parquet.schema.Types;
+import parquet.schema.Types.MessageTypeBuilder;
public class TestParquetParser {
- @Test
- public void testPaperExample() throws Exception {
- String example = "message Document {\n" +
- " required int64 DocId;\n" +
- " optional group Links {\n" +
- " repeated int64 Backward;\n" +
- " repeated int64 Forward; }\n" +
- " repeated group Name {\n" +
- " repeated group Language {\n" +
- " required binary Code;\n" +
- " required binary Country; }\n" +
- " optional binary Url; }}";
- MessageType parsed = MessageTypeParser.parseMessageType(example);
- MessageType manuallyMade =
- new MessageType("Document",
- new PrimitiveType(REQUIRED, INT64, "DocId"),
- new GroupType(OPTIONAL, "Links",
- new PrimitiveType(REPEATED, INT64, "Backward"),
- new PrimitiveType(REPEATED, INT64, "Forward")),
+ @Test
+ public void testPaperExample() throws Exception {
+ String example =
+ "message Document {\n" +
+ " required int64 DocId;\n" +
+ " optional group Links {\n" +
+ " repeated int64 Backward;\n" +
+ " repeated int64 Forward; }\n" +
+ " repeated group Name {\n" +
+ " repeated group Language {\n" +
+ " required binary Code;\n" +
+ " required binary Country; }\n" +
+ " optional binary Url; }}";
+ MessageType parsed = parseMessageType(example);
+ MessageType manuallyMade =
+ new MessageType("Document",
+ new PrimitiveType(REQUIRED, INT64, "DocId"),
+ new GroupType(OPTIONAL, "Links",
+ new PrimitiveType(REPEATED, INT64, "Backward"),
+ new PrimitiveType(REPEATED, INT64, "Forward")),
new GroupType(REPEATED, "Name",
new GroupType(REPEATED, "Language",
new PrimitiveType(REQUIRED, BINARY, "Code"),
new PrimitiveType(REQUIRED, BINARY, "Country")),
- new PrimitiveType(OPTIONAL, BINARY, "Url")));
- assertEquals(manuallyMade, parsed);
+ new PrimitiveType(OPTIONAL, BINARY, "Url")));
+ assertEquals(manuallyMade, parsed);
- MessageType parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
+ MessageType parsedThenReparsed = parseMessageType(parsed.toString());
- assertEquals(manuallyMade, parsedThenReparsed);
+ assertEquals(manuallyMade, parsedThenReparsed);
- parsed = MessageTypeParser.parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
- manuallyMade =
- new MessageType("m",
- new GroupType(REQUIRED, "a",
- new PrimitiveType(REQUIRED, BINARY, "b")),
+ parsed = parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
+ manuallyMade =
+ new MessageType("m",
+ new GroupType(REQUIRED, "a",
+ new PrimitiveType(REQUIRED, BINARY, "b")),
new GroupType(REQUIRED, "c",
new PrimitiveType(REQUIRED, INT64, "d")));
- assertEquals(manuallyMade, parsed);
+ assertEquals(manuallyMade, parsed);
- parsedThenReparsed = MessageTypeParser.parseMessageType(parsed.toString());
+ parsedThenReparsed = parseMessageType(parsed.toString());
- assertEquals(manuallyMade, parsedThenReparsed);
- }
+ assertEquals(manuallyMade, parsedThenReparsed);
+ }
@Test
public void testEachPrimitiveType() {
- Types.MessageTypeBuilder builder = Types.buildMessage();
+ MessageTypeBuilder builder = buildMessage();
StringBuilder schema = new StringBuilder();
schema.append("message EachType {\n");
for (PrimitiveTypeName type : PrimitiveTypeName.values()) {
@@ -88,40 +94,65 @@ public class TestParquetParser {
builder.required(FIXED_LEN_BYTE_ARRAY).length(3).named("fixed_");
} else {
schema.append(" required ").append(type)
- .append(" ").append(type).append("_;\n");
+ .append(" ").append(type).append("_;\n");
builder.required(type).named(type.toString() + "_");
}
}
schema.append("}\n");
MessageType expected = builder.named("EachType");
- MessageType parsed = MessageTypeParser.parseMessageType(schema.toString());
+ MessageType parsed = parseMessageType(schema.toString());
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
@Test
public void testUTF8Annotation() {
- String message = "message StringMessage {\n" +
+ String message =
+ "message StringMessage {\n" +
" required binary string (UTF8);\n" +
"}\n";
- MessageType parsed = MessageTypeParser.parseMessageType(message);
- MessageType expected = Types.buildMessage()
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
.required(BINARY).as(OriginalType.UTF8).named("string")
.named("StringMessage");
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
+ }
+
+ @Test
+ public void testIDs() {
+ String message =
+ "message Message {\n" +
+ " required binary string (UTF8) = 6;\n" +
+ " required int32 i=1;\n" +
+ " required binary s2= 3;\n" +
+ " required binary s3 =4;\n" +
+ "}\n";
+
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
+ .required(BINARY).as(OriginalType.UTF8).id(6).named("string")
+ .required(INT32).id(1).named("i")
+ .required(BINARY).id(3).named("s2")
+ .required(BINARY).id(4).named("s3")
+ .named("Message");
+
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
@Test
public void testMAPAnnotations() {
// this is primarily to test group annotations
- String message = "message Message {\n" +
+ String message =
+ "message Message {\n" +
" optional group aMap (MAP) {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
@@ -130,75 +161,78 @@ public class TestParquetParser {
" }\n" +
"}\n";
- MessageType parsed = MessageTypeParser.parseMessageType(message);
- MessageType expected = Types.buildMessage()
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
.optionalGroup()
- .repeatedGroup()
- .required(BINARY).as(OriginalType.UTF8).named("key")
- .required(INT32).named("value")
- .named("map")
- .named("aMap")
+ .repeatedGroup()
+ .required(BINARY).as(OriginalType.UTF8).named("key")
+ .required(INT32).named("value")
+ .named("map")
+ .named("aMap")
.named("Message");
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
@Test
public void testLISTAnnotation() {
// this is primarily to test group annotations
- String message = "message Message {\n" +
+ String message =
+ "message Message {\n" +
" required group aList (LIST) {\n" +
" repeated binary string (UTF8);\n" +
" }\n" +
"}\n";
- MessageType parsed = MessageTypeParser.parseMessageType(message);
- MessageType expected = Types.buildMessage()
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
.requiredGroup()
- .repeated(BINARY).as(OriginalType.UTF8).named("string")
- .named("aList")
+ .repeated(BINARY).as(OriginalType.UTF8).named("string")
+ .named("aList")
.named("Message");
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
@Test
public void testDecimalFixedAnnotation() {
- String message = "message DecimalMessage {\n" +
+ String message =
+ "message DecimalMessage {\n" +
" required FIXED_LEN_BYTE_ARRAY(4) aDecimal (DECIMAL(9,2));\n" +
"}\n";
- MessageType parsed = MessageTypeParser.parseMessageType(message);
- MessageType expected = Types.buildMessage()
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
.required(FIXED_LEN_BYTE_ARRAY).length(4)
- .as(OriginalType.DECIMAL).precision(9).scale(2)
- .named("aDecimal")
+ .as(OriginalType.DECIMAL).precision(9).scale(2)
+ .named("aDecimal")
.named("DecimalMessage");
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
@Test
public void testDecimalBinaryAnnotation() {
- String message = "message DecimalMessage {\n" +
+ String message =
+ "message DecimalMessage {\n" +
" required binary aDecimal (DECIMAL(9,2));\n" +
"}\n";
- MessageType parsed = MessageTypeParser.parseMessageType(message);
- MessageType expected = Types.buildMessage()
+ MessageType parsed = parseMessageType(message);
+ MessageType expected = buildMessage()
.required(BINARY).as(OriginalType.DECIMAL).precision(9).scale(2)
- .named("aDecimal")
+ .named("aDecimal")
.named("DecimalMessage");
- Assert.assertEquals(expected, parsed);
- MessageType reparsed = MessageTypeParser.parseMessageType(parsed.toString());
- Assert.assertEquals(expected, reparsed);
+ assertEquals(expected, parsed);
+ MessageType reparsed = parseMessageType(parsed.toString());
+ assertEquals(expected, reparsed);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/schema/TestMessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/parquet/schema/TestMessageType.java
index 46f9c07..a1d7a2f 100644
--- a/parquet-column/src/test/java/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/parquet/schema/TestMessageType.java
@@ -32,7 +32,6 @@ import parquet.schema.PrimitiveType.PrimitiveTypeName;
public class TestMessageType {
@Test
public void test() throws Exception {
- System.out.println(Paper.schema.toString());
MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
assertEquals(Paper.schema, schema);
assertEquals(schema.toString(), Paper.schema.toString());
@@ -130,4 +129,17 @@ public class TestMessageType {
assertEquals("can not merge type optional int32 a into optional binary a", e.getMessage());
}
}
+
+ @Test
+ public void testIDs() throws Exception {
+ MessageType schema = new MessageType("test",
+ new PrimitiveType(REQUIRED, BINARY, "foo").withId(4),
+ new GroupType(REQUIRED, "bar",
+ new PrimitiveType(REQUIRED, BINARY, "baz").withId(3)
+ ).withId(8)
+ );
+ MessageType schema2 = MessageTypeParser.parseMessageType(schema.toString());
+ assertEquals(schema, schema2);
+ assertEquals(schema.toString(), schema2.toString());
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
index 5616055..5309ad8 100644
--- a/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
+++ b/parquet-column/src/test/java/parquet/schema/TestTypeBuilders.java
@@ -194,7 +194,7 @@ public class TestTypeBuilders {
// int32 primitive type
MessageType expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, INT32, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
MessageType builderType = Types.buildMessage()
.required(INT32)
.as(OriginalType.DECIMAL).precision(9).scale(2)
@@ -204,7 +204,7 @@ public class TestTypeBuilders {
// int64 primitive type
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, INT64, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(18, 2)));
+ OriginalType.DECIMAL, new DecimalMetadata(18, 2), null));
builderType = Types.buildMessage()
.required(INT64)
.as(OriginalType.DECIMAL).precision(18).scale(2)
@@ -214,7 +214,7 @@ public class TestTypeBuilders {
// binary primitive type
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
builderType = Types.buildMessage()
.required(BINARY).as(OriginalType.DECIMAL).precision(9).scale(2)
.named("aDecimal")
@@ -223,7 +223,7 @@ public class TestTypeBuilders {
// fixed primitive type
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 4, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 2)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 2), null));
builderType = Types.buildMessage()
.required(FIXED_LEN_BYTE_ARRAY).length(4)
.as(OriginalType.DECIMAL).precision(9).scale(2)
@@ -236,7 +236,7 @@ public class TestTypeBuilders {
public void testDecimalAnnotationMissingScale() {
MessageType expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, INT32, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
MessageType builderType = Types.buildMessage()
.required(INT32)
.as(OriginalType.DECIMAL).precision(9)
@@ -246,7 +246,7 @@ public class TestTypeBuilders {
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, INT64, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
builderType = Types.buildMessage()
.required(INT64)
.as(OriginalType.DECIMAL).precision(9)
@@ -256,7 +256,7 @@ public class TestTypeBuilders {
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
builderType = Types.buildMessage()
.required(BINARY).as(OriginalType.DECIMAL).precision(9)
.named("aDecimal")
@@ -265,7 +265,7 @@ public class TestTypeBuilders {
expected = new MessageType("DecimalMessage",
new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 7, "aDecimal",
- OriginalType.DECIMAL, new DecimalMetadata(9, 0)));
+ OriginalType.DECIMAL, new DecimalMetadata(9, 0), null));
builderType = Types.buildMessage()
.required(FIXED_LEN_BYTE_ARRAY).length(7)
.as(OriginalType.DECIMAL).precision(9)
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
index 76834d5..8b6a2b3 100644
--- a/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/parquet/format/converter/ParquetMetadataConverter.java
@@ -559,6 +559,9 @@ public class ParquetMetadataConverter {
if (schemaElement.isSetConverted_type()) {
childBuilder.as(getOriginalType(schemaElement.converted_type));
}
+ if (schemaElement.isSetField_id()) {
+ childBuilder.id(schemaElement.field_id);
+ }
childBuilder.named(schemaElement.name);
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
index bfa7ac3..ff60aff 100644
--- a/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
+++ b/parquet-pig/src/main/java/parquet/pig/PigSchemaConverter.java
@@ -15,10 +15,13 @@
*/
package parquet.pig;
+import static parquet.Log.DEBUG;
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+
import org.apache.pig.LoadPushDown.RequiredField;
import org.apache.pig.LoadPushDown.RequiredFieldList;
import org.apache.pig.data.DataType;
@@ -29,10 +32,8 @@ import org.apache.pig.impl.util.ObjectSerializer;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.impl.util.Utils;
import org.apache.pig.parser.ParserException;
-import parquet.Log;
-import static parquet.Log.DEBUG;
-import static parquet.pig.TupleReadSupport.PARQUET_PIG_REQUIRED_FIELDS;
+import parquet.Log;
import parquet.schema.ConversionPatterns;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
@@ -62,9 +63,9 @@ public class PigSchemaConverter {
public PigSchemaConverter() {
this(false);
}
-
+
/**
- *
+ *
* @param columnIndexAccess toggle between name and index based access (default: false)
*/
public PigSchemaConverter(boolean columnIndexAccess) {
@@ -86,13 +87,13 @@ public class PigSchemaConverter {
interface ColumnAccess {
List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList);
}
-
+
class ColumnIndexAccess implements ColumnAccess {
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema pigSchema, RequiredFieldList requiredFieldsList) {
List<Type> newFields = new ArrayList<Type>();
List<Pair<FieldSchema,Integer>> indexedFields = new ArrayList<Pair<FieldSchema,Integer>>();
-
+
try {
if(requiredFieldsList == null) {
int index = 0;
@@ -114,11 +115,11 @@ public class PigSchemaConverter {
}
} catch (FrontendException e) {
throw new RuntimeException("Failed to filter requested fields", e);
- }
+ }
return newFields;
- }
+ }
}
-
+
class ColumnNameAccess implements ColumnAccess {
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
@@ -134,7 +135,7 @@ public class PigSchemaConverter {
return newFields;
}
}
-
+
/**
* @param pigSchema the pig schema to turn into a string representation
* @return the sctring representation of the schema
@@ -148,14 +149,14 @@ public class PigSchemaConverter {
if(requiredFieldString == null) {
return null;
}
-
+
try {
return (RequiredFieldList) ObjectSerializer.deserialize(requiredFieldString);
} catch (IOException e) {
throw new RuntimeException("Failed to deserialize pushProjection", e);
}
}
-
+
static String serializeRequiredFieldList(RequiredFieldList requiredFieldList) {
try {
return ObjectSerializer.serialize(requiredFieldList);
@@ -163,7 +164,7 @@ public class PigSchemaConverter {
throw new RuntimeException("Failed to searlize required fields.", e);
}
}
-
+
/**
* converts a parquet schema into a pig schema
* @param parquetSchema the parquet schema to convert to Pig schema
@@ -202,37 +203,37 @@ public class PigSchemaConverter {
private FieldSchema getSimpleFieldSchema(final String fieldName, Type parquetType)
throws FrontendException {
- final PrimitiveTypeName parquetPrimitiveTypeName =
+ final PrimitiveTypeName parquetPrimitiveTypeName =
parquetType.asPrimitiveType().getPrimitiveTypeName();
final OriginalType originalType = parquetType.getOriginalType();
return parquetPrimitiveTypeName.convert(
new PrimitiveTypeNameConverter<Schema.FieldSchema, FrontendException>() {
@Override
- public FieldSchema convertFLOAT(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertFLOAT(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
return new FieldSchema(fieldName, null, DataType.FLOAT);
}
@Override
- public FieldSchema convertDOUBLE(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertDOUBLE(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
return new FieldSchema(fieldName, null, DataType.DOUBLE);
}
@Override
- public FieldSchema convertINT32(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertINT32(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
return new FieldSchema(fieldName, null, DataType.INTEGER);
}
@Override
- public FieldSchema convertINT64(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertINT64(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
return new FieldSchema(fieldName, null, DataType.LONG);
}
@Override
- public FieldSchema convertINT96(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertINT96(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
throw new FrontendException("NYI");
}
@@ -244,13 +245,13 @@ public class PigSchemaConverter {
}
@Override
- public FieldSchema convertBOOLEAN(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertBOOLEAN(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
return new FieldSchema(fieldName, null, DataType.BOOLEAN);
}
@Override
- public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName)
+ public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName)
throws FrontendException {
if (originalType != null && originalType == OriginalType.UTF8) {
return new FieldSchema(fieldName, null, DataType.CHARARRAY);
@@ -437,7 +438,7 @@ public class PigSchemaConverter {
public MessageType filter(MessageType schemaToFilter, Schema requestedPigSchema) {
return filter(schemaToFilter, requestedPigSchema, null);
}
-
+
/**
* filters a Parquet schema based on a pig schema for projection
* @param schemaToFilter the schema to be filter
@@ -454,7 +455,7 @@ public class PigSchemaConverter {
} catch (RuntimeException e) {
throw new RuntimeException("can't filter " + schemaToFilter + " with " + requestedPigSchema, e);
}
- }
+ }
private Type filter(Type type, FieldSchema fieldSchema) {
if (DEBUG) LOG.debug("filtering type:\n" + type + "\nwith:\n " + fieldSchema);
@@ -478,7 +479,7 @@ public class PigSchemaConverter {
private Type filterTuple(GroupType tupleType, FieldSchema tupleFieldSchema) throws FrontendException {
if (DEBUG) LOG.debug("filtering TUPLE schema:\n" + tupleType + "\nwith:\n " + tupleFieldSchema);
- return new GroupType(tupleType.getRepetition(), tupleType.getName(), tupleType.getOriginalType(), columnAccess.filterTupleSchema(tupleType, tupleFieldSchema.schema, null));
+ return tupleType.withNewFields(columnAccess.filterTupleSchema(tupleType, tupleFieldSchema.schema, null));
}
private Type filterMap(GroupType mapType, FieldSchema mapFieldSchema) throws FrontendException {
@@ -491,13 +492,7 @@ public class PigSchemaConverter {
throw new RuntimeException("this should be a Map Key/Value: " + mapType);
}
FieldSchema innerField = mapFieldSchema.schema.getField(0);
- return new GroupType(
- mapType.getRepetition(), mapType.getName(), mapType.getOriginalType(),
- new GroupType(nested.getRepetition(), nested.getName(), nested.getOriginalType(),
- nested.getType(0),
- filter(nested.getType(1), innerField)
- )
- );
+ return mapType.withNewFields(nested.withNewFields(nested.getType(0), filter(nested.getType(1), innerField)));
}
private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws FrontendException {
@@ -511,9 +506,6 @@ public class PigSchemaConverter {
// Bags always contain tuples => we skip the extra tuple that was inserted in that case.
innerField = innerField.schema.getField(0);
}
- return new GroupType(
- bagType.getRepetition(), bagType.getName(), bagType.getOriginalType(),
- filter(nested, innerField)
- );
+ return bagType.withNewFields(filter(nested, innerField));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
index 4b0530d..f349b60 100644
--- a/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
+++ b/parquet-protobuf/src/main/java/parquet/proto/ProtoSchemaConverter.java
@@ -15,22 +15,28 @@
*/
package parquet.proto;
-import com.google.protobuf.Descriptors;
-import com.google.protobuf.Message;
-import com.twitter.elephantbird.util.Protobufs;
+import static parquet.schema.OriginalType.ENUM;
+import static parquet.schema.OriginalType.UTF8;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+
+import java.util.List;
+
import parquet.Log;
-import parquet.schema.ConversionPatterns;
-import parquet.schema.GroupType;
import parquet.schema.MessageType;
-import parquet.schema.OriginalType;
-import parquet.schema.PrimitiveType;
import parquet.schema.Type;
+import parquet.schema.Types;
+import parquet.schema.Types.Builder;
+import parquet.schema.Types.GroupBuilder;
-import java.util.ArrayList;
-import java.util.List;
-
-import static com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName;
+import com.google.protobuf.Descriptors;
+import com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
+import com.google.protobuf.Message;
+import com.twitter.elephantbird.util.Protobufs;
/**
* <p/>
@@ -45,72 +51,54 @@ public class ProtoSchemaConverter {
public MessageType convert(Class<? extends Message> protobufClass) {
LOG.debug("Converting protocol buffer class \"" + protobufClass + "\" to parquet schema.");
Descriptors.Descriptor descriptor = Protobufs.getMessageDescriptor(protobufClass);
-
- MessageType messageType = new MessageType(descriptor.getFullName(), convertFields(descriptor.getFields()));
-
+ MessageType messageType =
+ convertFields(Types.buildMessage(), descriptor.getFields())
+ .named(descriptor.getFullName());
LOG.debug("Converter info:\n " + descriptor.toProto() + " was converted to \n" + messageType);
return messageType;
}
/* Iterates over list of fields. **/
- private List<Type> convertFields(List<Descriptors.FieldDescriptor> fieldDescriptors) {
- List<Type> types = new ArrayList<Type>();
-
+ private <T> GroupBuilder<T> convertFields(GroupBuilder<T> groupBuilder, List<Descriptors.FieldDescriptor> fieldDescriptors) {
for (Descriptors.FieldDescriptor fieldDescriptor : fieldDescriptors) {
- String fieldName = fieldDescriptor.getName();
- Type.Repetition repetition = getRepetition(fieldDescriptor);
- Type type = convertScalarField(fieldName, fieldDescriptor, repetition);
- types.add(type);
+ groupBuilder =
+ addField(fieldDescriptor, groupBuilder)
+ .id(fieldDescriptor.getNumber())
+ .named(fieldDescriptor.getName());
}
- return types;
+ return groupBuilder;
}
private Type.Repetition getRepetition(Descriptors.FieldDescriptor descriptor) {
- Type.Repetition repetition;
if (descriptor.isRequired()) {
- repetition = Type.Repetition.REQUIRED;
+ return Type.Repetition.REQUIRED;
} else if (descriptor.isRepeated()) {
- repetition = Type.Repetition.REPEATED;
+ return Type.Repetition.REPEATED;
} else {
- repetition = Type.Repetition.OPTIONAL;
+ return Type.Repetition.OPTIONAL;
}
- return repetition;
}
- private Type convertScalarField(String fieldName, Descriptors.FieldDescriptor descriptor, Type.Repetition repetition) {
-
+ private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addField(Descriptors.FieldDescriptor descriptor, GroupBuilder<T> builder) {
+ Type.Repetition repetition = getRepetition(descriptor);
JavaType javaType = descriptor.getJavaType();
-
switch (javaType) {
- case BOOLEAN : return primitive(fieldName, PrimitiveTypeName.BOOLEAN, repetition);
- case INT : return primitive(fieldName, PrimitiveTypeName.INT32, repetition);
- case LONG : return primitive(fieldName, PrimitiveTypeName.INT64, repetition);
- case FLOAT : return primitive(fieldName, PrimitiveTypeName.FLOAT, repetition);
- case DOUBLE: return primitive(fieldName, PrimitiveTypeName.DOUBLE, repetition);
- case BYTE_STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition);
- case STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.UTF8);
- case MESSAGE: {
- Descriptors.Descriptor messageDescriptor = descriptor.getMessageType();
- List<Type> fields = convertFields(messageDescriptor.getFields());
- return new GroupType(repetition, fieldName, fields);
- }
- case ENUM: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.ENUM);
+ case BOOLEAN : return builder.primitive(BOOLEAN, repetition);
+ case INT : return builder.primitive(INT32, repetition);
+ case LONG : return builder.primitive(INT64, repetition);
+ case FLOAT : return builder.primitive(FLOAT, repetition);
+ case DOUBLE: return builder.primitive(DOUBLE, repetition);
+ case BYTE_STRING: return builder.primitive(BINARY, repetition);
+ case STRING: return builder.primitive(BINARY, repetition).as(UTF8);
+ case MESSAGE: {
+ GroupBuilder<GroupBuilder<T>> group = builder.group(repetition);
+ convertFields(group, descriptor.getMessageType().getFields());
+ return group;
+ }
+ case ENUM: return builder.primitive(BINARY, repetition).as(ENUM);
+ default:
+ throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
}
-
- throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType + " fieldName " + fieldName);
- }
-
- /**
- * Makes primitive type with additional information. Used for String and Binary types
- */
- private Type primitive(String name, PrimitiveTypeName primitive,
- Type.Repetition repetition, OriginalType originalType) {
- return new PrimitiveType(repetition, primitive, name, originalType);
- }
-
- private PrimitiveType primitive(String name, PrimitiveTypeName
- primitive, Type.Repetition repetition) {
- return new PrimitiveType(repetition, primitive, name, null);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
index 0062e54..7163f51 100644
--- a/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
+++ b/parquet-protobuf/src/test/java/parquet/proto/ProtoSchemaConverterTest.java
@@ -45,28 +45,28 @@ public class ProtoSchemaConverterTest {
public void testConvertAllDatatypes() throws Exception {
String expectedSchema =
"message TestProtobuf.SchemaConverterAllDatatypes {\n" +
- " optional double optionalDouble;\n" +
- " optional float optionalFloat;\n" +
- " optional int32 optionalInt32;\n" +
- " optional int64 optionalInt64;\n" +
- " optional int32 optionalUInt32;\n" +
- " optional int64 optionalUInt64;\n" +
- " optional int32 optionalSInt32;\n" +
- " optional int64 optionalSInt64;\n" +
- " optional int32 optionalFixed32;\n" +
- " optional int64 optionalFixed64;\n" +
- " optional int32 optionalSFixed32;\n" +
- " optional int64 optionalSFixed64;\n" +
- " optional boolean optionalBool;\n" +
- " optional binary optionalString (UTF8);\n" +
- " optional binary optionalBytes;\n" +
- " optional group optionalMessage {\n" +
- " optional int32 someId;\n" +
+ " optional double optionalDouble = 1;\n" +
+ " optional float optionalFloat = 2;\n" +
+ " optional int32 optionalInt32 = 3;\n" +
+ " optional int64 optionalInt64 = 4;\n" +
+ " optional int32 optionalUInt32 = 5;\n" +
+ " optional int64 optionalUInt64 = 6;\n" +
+ " optional int32 optionalSInt32 = 7;\n" +
+ " optional int64 optionalSInt64 = 8;\n" +
+ " optional int32 optionalFixed32 = 9;\n" +
+ " optional int64 optionalFixed64 = 10;\n" +
+ " optional int32 optionalSFixed32 = 11;\n" +
+ " optional int64 optionalSFixed64 = 12;\n" +
+ " optional boolean optionalBool = 13;\n" +
+ " optional binary optionalString (UTF8) = 14;\n" +
+ " optional binary optionalBytes = 15;\n" +
+ " optional group optionalMessage = 16 {\n" +
+ " optional int32 someId = 3;\n" +
" }\n" +
- " optional group pbgroup {\n" +
- " optional int32 groupInt;\n" +
+ " optional group pbgroup = 17 {\n" +
+ " optional int32 groupInt = 2;\n" +
" }\n" +
- " optional binary optionalEnum (ENUM);" +
+ " optional binary optionalEnum (ENUM) = 18;" +
"}";
testConversion(TestProtobuf.SchemaConverterAllDatatypes.class, expectedSchema);
@@ -76,17 +76,17 @@ public class ProtoSchemaConverterTest {
public void testConvertRepetition() throws Exception {
String expectedSchema =
"message TestProtobuf.SchemaConverterRepetition {\n" +
- " optional int32 optionalPrimitive;\n" +
- " required int32 requiredPrimitive;\n" +
- " repeated int32 repeatedPrimitive;\n" +
- " optional group optionalMessage {\n" +
- " optional int32 someId;\n" +
+ " optional int32 optionalPrimitive = 1;\n" +
+ " required int32 requiredPrimitive = 2;\n" +
+ " repeated int32 repeatedPrimitive = 3;\n" +
+ " optional group optionalMessage = 7 {\n" +
+ " optional int32 someId = 3;\n" +
" }\n" +
- " required group requiredMessage {" +
- " optional int32 someId;\n" +
+ " required group requiredMessage = 8 {" +
+ " optional int32 someId= 3;\n" +
" }\n" +
- " repeated group repeatedMessage {" +
- " optional int32 someId;\n" +
+ " repeated group repeatedMessage = 9 {" +
+ " optional int32 someId = 3;\n" +
" }\n" +
"}";
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
index 81a189b..f780dc5 100644
--- a/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
+++ b/parquet-thrift/src/main/java/parquet/thrift/ThriftSchemaConvertVisitor.java
@@ -16,21 +16,36 @@
package parquet.thrift;
-import parquet.schema.*;
+import static parquet.schema.ConversionPatterns.listType;
+import static parquet.schema.ConversionPatterns.mapType;
+import static parquet.schema.OriginalType.ENUM;
+import static parquet.schema.OriginalType.UTF8;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static parquet.schema.Type.Repetition.OPTIONAL;
+import static parquet.schema.Type.Repetition.REPEATED;
+import static parquet.schema.Type.Repetition.REQUIRED;
+import static parquet.schema.Types.primitive;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import parquet.schema.GroupType;
+import parquet.schema.MessageType;
+import parquet.schema.OriginalType;
+import parquet.schema.PrimitiveType;
+import parquet.schema.PrimitiveType.PrimitiveTypeName;
+import parquet.schema.Type;
+import parquet.schema.Types.PrimitiveBuilder;
import parquet.thrift.projection.FieldProjectionFilter;
import parquet.thrift.projection.FieldsPath;
import parquet.thrift.projection.ThriftProjectionException;
import parquet.thrift.struct.ThriftField;
import parquet.thrift.struct.ThriftType;
-import java.util.ArrayList;
-import java.util.List;
-
-import static parquet.schema.OriginalType.ENUM;
-import static parquet.schema.OriginalType.UTF8;
-import static parquet.schema.PrimitiveType.PrimitiveTypeName.*;
-import static parquet.schema.Type.Repetition.*;
-
/**
* Visitor Class for converting a thrift definiton to parquet message type.
* Projection can be done by providing a {@link FieldProjectionFilter}
@@ -85,7 +100,7 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
//restore Env
currentName = mapName;
currentRepetition = mapRepetition;
- currentType = ConversionPatterns.mapType(currentRepetition, currentName,
+ currentType = mapType(currentRepetition, currentName,
keyType,
valueType);
}
@@ -98,11 +113,11 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
currentName = currentName + "_tuple";
currentRepetition = REPEATED;
setElemField.getType().accept(this);
- //after convertion, currentType is the nested type
+ //after conversion, currentType is the nested type
if (currentType == null) {
return;
} else {
- currentType = ConversionPatterns.listType(setRepetition, setName, currentType);
+ currentType = listType(setRepetition, setName, currentType);
}
}
@@ -114,11 +129,11 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
currentName = currentName + "_tuple";
currentRepetition = REPEATED;
setElemField.getType().accept(this);
- //after convertion, currentType is the nested type
+ //after conversion, currentType is the nested type
if (currentType == null) {
return;
} else {
- currentType = ConversionPatterns.listType(listRepetition, listName, currentType);
+ currentType = listType(listRepetition, listName, currentType);
}
}
@@ -128,7 +143,7 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
if (currentType == null)
return new MessageType(currentName, new ArrayList<Type>());
- GroupType rootType = (GroupType) currentType;
+ GroupType rootType = currentType.asGroupType();
return new MessageType(currentName, rootType.getFields());
}
@@ -160,7 +175,8 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
currentFieldPath.push(field);
field.getType().accept(this);
if (currentType != null) {
- types.add(currentType);//currentType is converted with the currentName(fieldName)
+ // currentType is converted with the currentName(fieldName)
+ types.add(currentType.withId(field.getFieldId()));
}
currentFieldPath.pop();
}
@@ -169,66 +185,64 @@ public class ThriftSchemaConvertVisitor implements ThriftType.TypeVisitor {
private boolean isCurrentlyMatchedFilter(){
if(!fieldProjectionFilter.isMatched(currentFieldPath)){
- currentType=null;
+ currentType = null;
return false;
}
return true;
}
+ private void primitiveType(PrimitiveTypeName type) {
+ primitiveType(type, null);
+ }
+
+ private void primitiveType(PrimitiveTypeName type, OriginalType orig) {
+ if (isCurrentlyMatchedFilter()) {
+ PrimitiveBuilder<PrimitiveType> b = primitive(type, currentRepetition);
+ if (orig != null) {
+ b = b.as(orig);
+ }
+ currentType = b.named(currentName);
+ }
+ }
+
@Override
public void visit(ThriftType.EnumType enumType) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, BINARY, currentName, ENUM);
- }
+ primitiveType(BINARY, ENUM);
}
@Override
public void visit(ThriftType.BoolType boolType) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, BOOLEAN, currentName);
- }
+ primitiveType(BOOLEAN);
}
@Override
public void visit(ThriftType.ByteType byteType) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, INT32, currentName);
- }
+ primitiveType(INT32);
}
@Override
public void visit(ThriftType.DoubleType doubleType) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, DOUBLE, currentName);
- }
+ primitiveType(DOUBLE);
}
@Override
public void visit(ThriftType.I16Type i16Type) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, INT32, currentName);
- }
+ primitiveType(INT32);
}
@Override
public void visit(ThriftType.I32Type i32Type) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, INT32, currentName);
- }
+ primitiveType(INT32);
}
@Override
public void visit(ThriftType.I64Type i64Type) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, INT64, currentName);
- }
+ primitiveType(INT64);
}
@Override
public void visit(ThriftType.StringType stringType) {
- if (isCurrentlyMatchedFilter()){
- currentType = new PrimitiveType(currentRepetition, BINARY, currentName, UTF8);
- }
+ primitiveType(BINARY, UTF8);
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java b/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
index b88ad63..42e10f3 100644
--- a/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
+++ b/parquet-thrift/src/main/java/parquet/thrift/projection/FieldsPath.java
@@ -15,12 +15,11 @@
*/
package parquet.thrift.projection;
-import com.twitter.elephantbird.thrift.TStructDescriptor;
+import java.util.ArrayList;
+
import parquet.thrift.struct.ThriftField;
import parquet.thrift.struct.ThriftType;
-import java.util.ArrayList;
-
/**
* represent field path for thrift field
*
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/3a082e8e/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java b/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
index 1bb79cf..c0f80bf 100644
--- a/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
+++ b/parquet-thrift/src/test/java/parquet/thrift/TestThriftSchemaConverter.java
@@ -15,19 +15,22 @@
*/
package parquet.thrift;
-import com.twitter.data.proto.tutorial.thrift.AddressBook;
-import com.twitter.data.proto.tutorial.thrift.Person;
-import com.twitter.elephantbird.thrift.test.TestStructInMap;
+import static org.junit.Assert.assertEquals;
+import static parquet.schema.MessageTypeParser.parseMessageType;
+
+import org.apache.thrift.TBase;
import org.junit.Test;
+
import parquet.schema.MessageType;
import parquet.schema.MessageTypeParser;
import parquet.thrift.projection.FieldProjectionFilter;
import parquet.thrift.projection.ThriftProjectionException;
import parquet.thrift.struct.ThriftType;
import parquet.thrift.struct.ThriftType.StructType;
-import parquet.thrift.test.TestPerson;
-import static org.junit.Assert.assertEquals;
+import com.twitter.data.proto.tutorial.thrift.AddressBook;
+import com.twitter.data.proto.tutorial.thrift.Person;
+import com.twitter.elephantbird.thrift.test.TestStructInMap;
public class TestThriftSchemaConverter {
@@ -35,18 +38,18 @@ public class TestThriftSchemaConverter {
public void testToMessageType() throws Exception {
String expected =
"message ParquetSchema {\n" +
- " optional group persons (LIST) {\n" +
+ " optional group persons (LIST) = 1 {\n" +
" repeated group persons_tuple {\n" +
- " required group name {\n" +
- " optional binary first_name (UTF8);\n" +
- " optional binary last_name (UTF8);\n" +
+ " required group name = 1 {\n" +
+ " optional binary first_name (UTF8) = 1;\n" +
+ " optional binary last_name (UTF8) = 2;\n" +
" }\n" +
- " optional int32 id;\n" +
- " optional binary email (UTF8);\n" +
- " optional group phones (LIST) {\n" +
+ " optional int32 id = 2;\n" +
+ " optional binary email (UTF8) = 3;\n" +
+ " optional group phones (LIST) = 4 {\n" +
" repeated group phones_tuple {\n" +
- " optional binary number (UTF8);\n" +
- " optional binary type (ENUM);\n" +
+ " optional binary number (UTF8) = 1;\n" +
+ " optional binary type (ENUM) = 2;\n" +
" }\n" +
" }\n" +
" }\n" +
@@ -61,58 +64,58 @@ public class TestThriftSchemaConverter {
public void testToProjectedThriftType() {
shouldGetProjectedSchema("name/first_name", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
" }}", Person.class);
shouldGetProjectedSchema("name/first_name;name/last_name", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
" }}", Person.class);
shouldGetProjectedSchema("name/{first,last}_name;", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
" }}", Person.class);
shouldGetProjectedSchema("name/*", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("name/*", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("*/*_name", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("name/first_*", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
" }" +
"}", Person.class);
shouldGetProjectedSchema("*/*", "message ParquetSchema {" +
- " required group name {" +
- " optional binary first_name (UTF8);" +
- " optional binary last_name (UTF8);" +
+ " required group name = 1 {" +
+ " optional binary first_name (UTF8) = 1;" +
+ " optional binary last_name (UTF8) = 2;" +
"} " +
- " optional group phones (LIST) {" +
+ " optional group phones (LIST) = 4 {" +
" repeated group phones_tuple {" +
- " optional binary number (UTF8);" +
- " optional binary type (ENUM);" +
+ " optional binary number (UTF8) = 1;" +
+ " optional binary type (ENUM) = 2;" +
" }" +
"}}", Person.class);
@@ -147,16 +150,16 @@ public class TestThriftSchemaConverter {
public void testProjectMapThriftType() {
//project nested map
shouldGetProjectedSchema("name;names/key*;names/value/**", "message ParquetSchema {\n" +
- " optional binary name (UTF8);\n" +
- " optional group names (MAP) {\n" +
+ " optional binary name (UTF8) = 1;\n" +
+ " optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" optional group value {\n" +
- " optional group name {\n" +
- " optional binary first_name (UTF8);\n" +
- " optional binary last_name (UTF8);\n" +
+ " optional group name = 1 {\n" +
+ " optional binary first_name (UTF8) = 1;\n" +
+ " optional binary last_name (UTF8) = 2;\n" +
" }\n" +
- " optional group phones (MAP) {\n" +
+ " optional group phones (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (ENUM);\n" +
" optional binary value (UTF8);\n" +
@@ -169,14 +172,14 @@ public class TestThriftSchemaConverter {
//project only one level of nested map
shouldGetProjectedSchema("name;names/key;names/value/name/*", "message ParquetSchema {\n" +
- " optional binary name (UTF8);\n" +
- " optional group names (MAP) {\n" +
+ " optional binary name (UTF8) = 1;\n" +
+ " optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" optional group value {\n" +
- " optional group name {\n" +
- " optional binary first_name (UTF8);\n" +
- " optional binary last_name (UTF8);\n" +
+ " optional group name = 1 {\n" +
+ " optional binary first_name (UTF8) = 1;\n" +
+ " optional binary last_name (UTF8) = 2;\n" +
" }\n" +
" }\n" +
" }\n" +
@@ -187,8 +190,8 @@ public class TestThriftSchemaConverter {
@Test
public void testProjectOnlyKeyInMap() {
shouldGetProjectedSchema("name;names/key","message ParquetSchema {\n" +
- " optional binary name (UTF8);\n" +
- " optional group names (MAP) {\n" +
+ " optional binary name (UTF8) = 1;\n" +
+ " optional group names (MAP) = 2 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required binary key (UTF8);\n" +
" }\n" +
@@ -201,13 +204,13 @@ public class TestThriftSchemaConverter {
System.out.println(getFilteredSchema("name;names/value/**", TestStructInMap.class));
}
- private void shouldGetProjectedSchema(String filterDesc, String expectedSchemaStr, Class thriftClass) {
+ private void shouldGetProjectedSchema(String filterDesc, String expectedSchemaStr, Class<? extends TBase<?,?>> thriftClass) {
MessageType requestedSchema = getFilteredSchema(filterDesc, thriftClass);
- MessageType expectedSchema = MessageTypeParser.parseMessageType(expectedSchemaStr);
+ MessageType expectedSchema = parseMessageType(expectedSchemaStr);
assertEquals(expectedSchema, requestedSchema);
}
- private MessageType getFilteredSchema(String filterDesc, Class thriftClass) {
+ private MessageType getFilteredSchema(String filterDesc, Class<? extends TBase<?,?>> thriftClass) {
FieldProjectionFilter fieldProjectionFilter = new FieldProjectionFilter(filterDesc);
return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass);
}