You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/03/11 23:11:31 UTC

incubator-parquet-mr git commit: PARQUET-172: Add parquet-thrift binary tests.

Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master 5acc6a550 -> 031a762d1


PARQUET-172: Add parquet-thrift binary tests.

These tests validate that there is no encoding problem with parquet-thrift or parquet-scrooge. See https://github.com/laurencer/parquet-mr-bug

Author: Ryan Blue <bl...@apache.org>

Closes #145 from rdblue/PARQUET-172-add-thrift-binary-test and squashes the following commits:

6856414 [Ryan Blue] PARQUET-172: Add parquet-thrift binary tests.


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/031a762d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/031a762d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/031a762d

Branch: refs/heads/master
Commit: 031a762d105bceda2049204ba54b8f8737f359b4
Parents: 5acc6a5
Author: Ryan Blue <bl...@apache.org>
Authored: Wed Mar 11 15:11:16 2015 -0700
Committer: Ryan Blue <bl...@apache.org>
Committed: Wed Mar 11 15:11:16 2015 -0700

----------------------------------------------------------------------
 .../java/parquet/scrooge/ScroogeBinaryTest.java | 100 +++++++++++++++++++
 .../scrooge/ScroogeStructConverterTest.java     |   7 ++
 parquet-scrooge/src/test/thrift/test.thrift     |   4 +
 .../java/parquet/hadoop/thrift/TestBinary.java  |  66 ++++++++++++
 parquet-thrift/src/test/thrift/binary.thrift    |  25 +++++
 5 files changed, 202 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
new file mode 100644
index 0000000..19bf68c
--- /dev/null
+++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package parquet.scrooge;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.UUID;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import parquet.hadoop.ParquetReader;
+import parquet.hadoop.ParquetWriter;
+import parquet.scrooge.test.StringAndBinary;
+import parquet.thrift.ThriftParquetReader;
+
+public class ScroogeBinaryTest {
+  @Rule
+  public TemporaryFolder tempDir = new TemporaryFolder();
+
+  @Test
+  public void testScroogeBinaryEncoding() throws Exception {
+    StringAndBinary expected = new StringAndBinary.Immutable("test",
+        ByteBuffer.wrap(new byte[] {-123, 20, 33}));
+
+    File temp = tempDir.newFile(UUID.randomUUID().toString());
+    temp.deleteOnExit();
+    temp.delete();
+
+    Path path = new Path(temp.getPath());
+
+    ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
+        path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
+    writer.write(expected);
+    writer.close();
+
+    // read using the parquet-thrift version to isolate the write path
+    ParquetReader<parquet.thrift.test.binary.StringAndBinary> reader = ThriftParquetReader.<parquet.thrift.test.binary.StringAndBinary>
+        build(path)
+        .withThriftClass(parquet.thrift.test.binary.StringAndBinary.class)
+        .build();
+    parquet.thrift.test.binary.StringAndBinary record = reader.read();
+    reader.close();
+
+    Assert.assertEquals("String should match after serialization round trip",
+        "test", record.s);
+    Assert.assertEquals("ByteBuffer should match after serialization round trip",
+        ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b);
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testScroogeBinaryDecoding() throws Exception {
+    StringAndBinary expected = new StringAndBinary.Immutable("test",
+        ByteBuffer.wrap(new byte[] {-123, 20, 33}));
+
+    File temp = tempDir.newFile(UUID.randomUUID().toString());
+    temp.deleteOnExit();
+    temp.delete();
+
+    Path path = new Path(temp.getPath());
+
+    ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
+        path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
+    writer.write(expected);
+    writer.close();
+
+    Configuration conf = new Configuration();
+    conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
+    ParquetReader<StringAndBinary> reader = ParquetReader.<StringAndBinary>
+        builder(new ScroogeReadSupport(), path)
+        .withConf(conf)
+        .build();
+    StringAndBinary record = reader.read();
+    reader.close();
+
+    Assert.assertEquals("String should match after serialization round trip",
+        "test", record.s());
+    Assert.assertEquals("ByteBuffer should match after serialization round trip",
+        ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b());
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
index 7431c10..3dc5369 100644
--- a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
+++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
@@ -30,6 +30,7 @@ import parquet.scrooge.test.TestOptionalMap;
 import parquet.scrooge.test.TestPersonWithAllInformation;
 import parquet.scrooge.test.TestSetPrimitive;
 import parquet.scrooge.test.TestUnion;
+import parquet.scrooge.test.StringAndBinary;
 import parquet.thrift.ThriftSchemaConverter;
 import parquet.thrift.struct.ThriftType;
 import static org.junit.Assert.assertEquals;
@@ -43,7 +44,13 @@ public class ScroogeStructConverterTest {
     ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestMapPrimitiveKey.class);
     ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.TestMapPrimitiveKey.class);
     assertEquals(expected,scroogeMap);
+  }
 
+  @Test
+  public void testBinary() throws Exception {
+    ThriftType.StructType scroogeBinary = new ScroogeStructConverter().convert(StringAndBinary.class);
+    ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.StringAndBinary.class);
+    assertEquals(expected, scroogeBinary);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/thrift/test.thrift
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/thrift/test.thrift b/parquet-scrooge/src/test/thrift/test.thrift
index 11c598f..a80bbb0 100644
--- a/parquet-scrooge/src/test/thrift/test.thrift
+++ b/parquet-scrooge/src/test/thrift/test.thrift
@@ -168,3 +168,7 @@ struct TestFieldOfEnum{
  2: optional Operation op2
 }
 
+struct StringAndBinary {
+  1: required string s;
+  2: required binary b;
+}

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
new file mode 100644
index 0000000..36fb7e6
--- /dev/null
+++ b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package parquet.hadoop.thrift;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.UUID;
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import parquet.hadoop.ParquetReader;
+import parquet.hadoop.metadata.CompressionCodecName;
+import parquet.thrift.ThriftParquetReader;
+import parquet.thrift.ThriftParquetWriter;
+import parquet.thrift.test.binary.StringAndBinary;
+
+public class TestBinary {
+  @Rule
+  public TemporaryFolder tempDir = new TemporaryFolder();
+
+  @Test
+  public void testBinary() throws IOException {
+    StringAndBinary expected = new StringAndBinary("test",
+        ByteBuffer.wrap(new byte[] { -123, 20, 33 }));
+    File temp = tempDir.newFile(UUID.randomUUID().toString());
+    temp.deleteOnExit();
+    temp.delete();
+
+    Path path = new Path(temp.getPath());
+
+    ThriftParquetWriter<StringAndBinary> writer =
+        new ThriftParquetWriter<StringAndBinary>(
+            path, StringAndBinary.class, CompressionCodecName.SNAPPY);
+    writer.write(expected);
+    writer.close();
+
+    ParquetReader<StringAndBinary> reader = ThriftParquetReader.<StringAndBinary>
+        build(path)
+        .withThriftClass(StringAndBinary.class)
+        .build();
+    StringAndBinary record = reader.read();
+    reader.close();
+
+    Assert.assertEquals("Should match after serialization round trip",
+        expected, record);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/thrift/binary.thrift
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/thrift/binary.thrift b/parquet-thrift/src/test/thrift/binary.thrift
new file mode 100644
index 0000000..fa80424
--- /dev/null
+++ b/parquet-thrift/src/test/thrift/binary.thrift
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace java parquet.thrift.test.binary
+
+struct StringAndBinary {
+  1: required string s;
+  2: required binary b;
+}