You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/03/11 23:11:31 UTC
incubator-parquet-mr git commit: PARQUET-172: Add parquet-thrift
binary tests.
Repository: incubator-parquet-mr
Updated Branches:
refs/heads/master 5acc6a550 -> 031a762d1
PARQUET-172: Add parquet-thrift binary tests.
These tests validate that there is no encoding problem with parquet-thrift or parquet-scrooge. See https://github.com/laurencer/parquet-mr-bug
Author: Ryan Blue <bl...@apache.org>
Closes #145 from rdblue/PARQUET-172-add-thrift-binary-test and squashes the following commits:
6856414 [Ryan Blue] PARQUET-172: Add parquet-thrift binary tests.
Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/031a762d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/031a762d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/031a762d
Branch: refs/heads/master
Commit: 031a762d105bceda2049204ba54b8f8737f359b4
Parents: 5acc6a5
Author: Ryan Blue <bl...@apache.org>
Authored: Wed Mar 11 15:11:16 2015 -0700
Committer: Ryan Blue <bl...@apache.org>
Committed: Wed Mar 11 15:11:16 2015 -0700
----------------------------------------------------------------------
.../java/parquet/scrooge/ScroogeBinaryTest.java | 100 +++++++++++++++++++
.../scrooge/ScroogeStructConverterTest.java | 7 ++
parquet-scrooge/src/test/thrift/test.thrift | 4 +
.../java/parquet/hadoop/thrift/TestBinary.java | 66 ++++++++++++
parquet-thrift/src/test/thrift/binary.thrift | 25 +++++
5 files changed, 202 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
new file mode 100644
index 0000000..19bf68c
--- /dev/null
+++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeBinaryTest.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package parquet.scrooge;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.UUID;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import parquet.hadoop.ParquetReader;
+import parquet.hadoop.ParquetWriter;
+import parquet.scrooge.test.StringAndBinary;
+import parquet.thrift.ThriftParquetReader;
+
+public class ScroogeBinaryTest {
+ @Rule
+ public TemporaryFolder tempDir = new TemporaryFolder();
+
+ @Test
+ public void testScroogeBinaryEncoding() throws Exception {
+ StringAndBinary expected = new StringAndBinary.Immutable("test",
+ ByteBuffer.wrap(new byte[] {-123, 20, 33}));
+
+ File temp = tempDir.newFile(UUID.randomUUID().toString());
+ temp.deleteOnExit();
+ temp.delete();
+
+ Path path = new Path(temp.getPath());
+
+ ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
+ path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
+ writer.write(expected);
+ writer.close();
+
+ // read using the parquet-thrift version to isolate the write path
+ ParquetReader<parquet.thrift.test.binary.StringAndBinary> reader = ThriftParquetReader.<parquet.thrift.test.binary.StringAndBinary>
+ build(path)
+ .withThriftClass(parquet.thrift.test.binary.StringAndBinary.class)
+ .build();
+ parquet.thrift.test.binary.StringAndBinary record = reader.read();
+ reader.close();
+
+ Assert.assertEquals("String should match after serialization round trip",
+ "test", record.s);
+ Assert.assertEquals("ByteBuffer should match after serialization round trip",
+ ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b);
+ }
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testScroogeBinaryDecoding() throws Exception {
+ StringAndBinary expected = new StringAndBinary.Immutable("test",
+ ByteBuffer.wrap(new byte[] {-123, 20, 33}));
+
+ File temp = tempDir.newFile(UUID.randomUUID().toString());
+ temp.deleteOnExit();
+ temp.delete();
+
+ Path path = new Path(temp.getPath());
+
+ ParquetWriter<StringAndBinary> writer = new ParquetWriter<StringAndBinary>(
+ path, new Configuration(), new ScroogeWriteSupport<StringAndBinary>(StringAndBinary.class));
+ writer.write(expected);
+ writer.close();
+
+ Configuration conf = new Configuration();
+ conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
+ ParquetReader<StringAndBinary> reader = ParquetReader.<StringAndBinary>
+ builder(new ScroogeReadSupport(), path)
+ .withConf(conf)
+ .build();
+ StringAndBinary record = reader.read();
+ reader.close();
+
+ Assert.assertEquals("String should match after serialization round trip",
+ "test", record.s());
+ Assert.assertEquals("ByteBuffer should match after serialization round trip",
+ ByteBuffer.wrap(new byte[] {-123, 20, 33}), record.b());
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
index 7431c10..3dc5369 100644
--- a/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
+++ b/parquet-scrooge/src/test/java/parquet/scrooge/ScroogeStructConverterTest.java
@@ -30,6 +30,7 @@ import parquet.scrooge.test.TestOptionalMap;
import parquet.scrooge.test.TestPersonWithAllInformation;
import parquet.scrooge.test.TestSetPrimitive;
import parquet.scrooge.test.TestUnion;
+import parquet.scrooge.test.StringAndBinary;
import parquet.thrift.ThriftSchemaConverter;
import parquet.thrift.struct.ThriftType;
import static org.junit.Assert.assertEquals;
@@ -43,7 +44,13 @@ public class ScroogeStructConverterTest {
ThriftType.StructType scroogeMap = new ScroogeStructConverter().convert(TestMapPrimitiveKey.class);
ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.TestMapPrimitiveKey.class);
assertEquals(expected,scroogeMap);
+ }
+ @Test
+ public void testBinary() throws Exception {
+ ThriftType.StructType scroogeBinary = new ScroogeStructConverter().convert(StringAndBinary.class);
+ ThriftType.StructType expected = new ThriftSchemaConverter().toStructType(parquet.thrift.test.StringAndBinary.class);
+ assertEquals(expected, scroogeBinary);
}
@Test
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-scrooge/src/test/thrift/test.thrift
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/test/thrift/test.thrift b/parquet-scrooge/src/test/thrift/test.thrift
index 11c598f..a80bbb0 100644
--- a/parquet-scrooge/src/test/thrift/test.thrift
+++ b/parquet-scrooge/src/test/thrift/test.thrift
@@ -168,3 +168,7 @@ struct TestFieldOfEnum{
2: optional Operation op2
}
+struct StringAndBinary {
+ 1: required string s;
+ 2: required binary b;
+}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
new file mode 100644
index 0000000..36fb7e6
--- /dev/null
+++ b/parquet-thrift/src/test/java/parquet/hadoop/thrift/TestBinary.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package parquet.hadoop.thrift;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.UUID;
+import org.apache.hadoop.fs.Path;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import parquet.hadoop.ParquetReader;
+import parquet.hadoop.metadata.CompressionCodecName;
+import parquet.thrift.ThriftParquetReader;
+import parquet.thrift.ThriftParquetWriter;
+import parquet.thrift.test.binary.StringAndBinary;
+
+public class TestBinary {
+ @Rule
+ public TemporaryFolder tempDir = new TemporaryFolder();
+
+ @Test
+ public void testBinary() throws IOException {
+ StringAndBinary expected = new StringAndBinary("test",
+ ByteBuffer.wrap(new byte[] { -123, 20, 33 }));
+ File temp = tempDir.newFile(UUID.randomUUID().toString());
+ temp.deleteOnExit();
+ temp.delete();
+
+ Path path = new Path(temp.getPath());
+
+ ThriftParquetWriter<StringAndBinary> writer =
+ new ThriftParquetWriter<StringAndBinary>(
+ path, StringAndBinary.class, CompressionCodecName.SNAPPY);
+ writer.write(expected);
+ writer.close();
+
+ ParquetReader<StringAndBinary> reader = ThriftParquetReader.<StringAndBinary>
+ build(path)
+ .withThriftClass(StringAndBinary.class)
+ .build();
+ StringAndBinary record = reader.read();
+ reader.close();
+
+ Assert.assertEquals("Should match after serialization round trip",
+ expected, record);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/031a762d/parquet-thrift/src/test/thrift/binary.thrift
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/thrift/binary.thrift b/parquet-thrift/src/test/thrift/binary.thrift
new file mode 100644
index 0000000..fa80424
--- /dev/null
+++ b/parquet-thrift/src/test/thrift/binary.thrift
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace java parquet.thrift.test.binary
+
+struct StringAndBinary {
+ 1: required string s;
+ 2: required binary b;
+}