You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by zi...@apache.org on 2018/01/19 15:54:37 UTC

parquet-mr git commit: PARQUET-1191: Type.hashCode() takes originalType into account but Type.equals() does not

Repository: parquet-mr
Updated Branches:
  refs/heads/master b80b1844e -> 878ebcd0b


PARQUET-1191: Type.hashCode() takes originalType into account but Type.equals() does not

Author: Nandor Kollar <nk...@cloudera.com>

Closes #450 from nandorKollar/PARQUET-1191 and squashes the following commits:

c7131df [Nandor Kollar] PARQUET-1191: Type.hashCode() takes originalType into account but Type.equals() does not


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/878ebcd0
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/878ebcd0
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/878ebcd0

Branch: refs/heads/master
Commit: 878ebcd0bc2592fa9d5dda01117c07bc3c40bb33
Parents: b80b184
Author: Nandor Kollar <nk...@cloudera.com>
Authored: Fri Jan 19 16:53:42 2018 +0100
Committer: Zoltan Ivanfi <zi...@cloudera.com>
Committed: Fri Jan 19 16:53:42 2018 +0100

----------------------------------------------------------------------
 .../src/main/java/org/apache/parquet/schema/Type.java  |  4 ++--
 .../org/apache/parquet/pig/TestPigSchemaConverter.java | 13 +++++--------
 .../apache/parquet/scrooge/ScroogeStructConverter.java |  8 +++++++-
 .../apache/parquet/thrift/ThriftSchemaConverter.java   |  2 ++
 .../org/apache/parquet/hadoop/thrift/TestBinary.java   |  5 +----
 5 files changed, 17 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
index 176b9a6..dd2c38d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
@@ -20,7 +20,6 @@ package org.apache.parquet.schema;
 
 import static org.apache.parquet.Preconditions.checkNotNull;
 
-import java.io.Serializable;
 import java.util.List;
 
 import org.apache.parquet.io.InvalidRecordException;
@@ -262,7 +261,8 @@ abstract public class Type {
         name.equals(other.name)
         && repetition == other.repetition
         && eqOrBothNull(repetition, other.repetition)
-        && eqOrBothNull(id, other.id);
+        && eqOrBothNull(id, other.id)
+        && eqOrBothNull(originalType, other.originalType);
   };
 
   @Override

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
index 646e117..64d5961 100644
--- a/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
+++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestPigSchemaConverter.java
@@ -18,7 +18,7 @@
  */
 package org.apache.parquet.pig;
 
-import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
 import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
 import static org.junit.Assert.assertEquals;
 import static org.apache.parquet.pig.PigSchemaConverter.pigSchemaToString;
@@ -31,12 +31,9 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.parquet.schema.PrimitiveType;
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.OriginalType;
-import org.apache.parquet.schema.PrimitiveType;
-import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Types;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.Utils;
@@ -215,7 +212,7 @@ public class TestPigSchemaConverter {
         "}\n",
         "a:{" + PigSchemaConverter.ARRAY_VALUE_NAME + ":(b: chararray)}");
   }
-  
+
   private void testFixedConversion(String schemaString, String pigSchemaString)
       throws Exception {
     Schema expectedPigSchema = Utils.getSchemaFromString(pigSchemaString);
@@ -224,7 +221,7 @@ public class TestPigSchemaConverter {
     assertEquals("converting " + schemaString + " to " + pigSchemaString,
                  expectedPigSchema, pigSchema);
   }
-  
+
   @Test
   public void testMapWithFixed() throws Exception {
     testFixedConversion(
@@ -272,11 +269,11 @@ public class TestPigSchemaConverter {
   }
 
   @Test
-  public void testAnnonymousField() throws Exception {
+  public void testAnonymousField() throws Exception {
     testConversion(
         "a:chararray, int",
         "message pig_schema {\n" +
-        "  optional binary a;\n" +
+        "  optional binary a (UTF8);\n" +
         "  optional int32 val_0;\n" +
         "}\n");
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
index c7448e3..310bb4c 100644
--- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
+++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeStructConverter.java
@@ -189,7 +189,13 @@ public class ScroogeStructConverter {
         thriftType = new ThriftType.I64Type();
         break;
       case STRING:
-        thriftType = new ThriftType.StringType();
+        ThriftType.StringType stringType = new ThriftType.StringType();
+        // There is no real binary type (see THRIFT-1920) in Thrift,
+        // binary data is represented by String type with an additional binary flag.
+        if (!String.class.equals(scroogeField.manifest().runtimeClass())) {
+          stringType.setBinary(true);
+        }
+        thriftType = stringType;
         break;
       case STRUCT:
         thriftType = convertStructTypeField(scroogeField);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
index c3a166a..7717e04 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
@@ -165,6 +165,8 @@ public class ThriftSchemaConverter {
       case STRING:
         StringType stringType = new StringType();
         FieldMetaData fieldMetaData = field.getFieldMetaData();
+        // There is no real binary type (see THRIFT-1920) in Thrift,
+        // binary data is represented by String type with an additional binary flag.
         if (fieldMetaData != null && fieldMetaData.valueMetaData.isBinary()) {
           stringType.setBinary(true);
         }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/878ebcd0/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java
index a152bba..ac5a08b 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/hadoop/thrift/TestBinary.java
@@ -42,7 +42,6 @@ import org.apache.parquet.thrift.ThriftParquetWriter;
 import org.apache.parquet.thrift.test.binary.StringAndBinary;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
 
 public class TestBinary {
   @Rule
@@ -81,9 +80,7 @@ public class TestBinary {
   private void assertSchema(ParquetMetadata parquetMetadata) {
     List<Type> fields = parquetMetadata.getFileMetaData().getSchema().getFields();
     assertEquals(2, fields.size());
-    assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).id(1).named("s"), fields.get(0));
-    assertEquals(OriginalType.UTF8, fields.get(0).getOriginalType());
+    assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8).id(1).named("s"), fields.get(0));
     assertEquals(Types.required(PrimitiveType.PrimitiveTypeName.BINARY).id(2).named("b"), fields.get(1));
-    assertNull(fields.get(1).getOriginalType());
   }
 }