You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2013/02/14 21:47:49 UTC
svn commit: r1446320 - in /avro/trunk: ./ lang/java/trevni/core/
lang/java/trevni/core/src/main/java/org/apache/trevni/
lang/java/trevni/core/src/test/java/org/apache/trevni/
Author: cutting
Date: Thu Feb 14 20:47:49 2013
New Revision: 1446320
URL: http://svn.apache.org/r1446320
Log:
AVRO-1253. Java: Add support for bzip2 file compression to Trevni. Contributed by Ted Malaska.
Added:
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java (with props)
avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java (with props)
Modified:
avro/trunk/CHANGES.txt
avro/trunk/lang/java/trevni/core/pom.xml
avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Feb 14 20:47:49 2013
@@ -15,6 +15,9 @@ Trunk (not yet released)
AVRO-1243. Java: Add support for bzip2 file compression and
translate Hadoop job compression options. (Ted Malaska via cutting)
+ AVRO-1253. Java: Add support for bzip2 file compression to Trevni.
+ (Ted Malaska via cutting)
+
IMPROVEMENTS
AVRO-1211. Add MR guide to documentation. (Skye Wanderman-Milne via
Modified: avro/trunk/lang/java/trevni/core/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/pom.xml?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/pom.xml (original)
+++ avro/trunk/lang/java/trevni/core/pom.xml Thu Feb 14 20:47:49 2013
@@ -43,6 +43,10 @@
<artifactId>snappy-java</artifactId>
<version>${snappy.version}</version>
<scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
</dependency>
</dependencies>
Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java?rev=1446320&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java Thu Feb 14 20:47:49 2013
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+
+public class BZip2Codec extends Codec {
+
+ private ByteArrayOutputStream outputBuffer;
+ public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+
+ @Override
+ ByteBuffer compress(ByteBuffer uncompressedData) throws IOException {
+ ByteArrayOutputStream baos = getOutputBuffer(uncompressedData.remaining());
+ BZip2CompressorOutputStream outputStream = new BZip2CompressorOutputStream(baos);
+
+ try {
+ outputStream.write(uncompressedData.array());
+ } finally {
+ outputStream.close();
+ }
+
+ ByteBuffer result = ByteBuffer.wrap(baos.toByteArray());
+ return result;
+ }
+
+ @Override
+ ByteBuffer decompress(ByteBuffer compressedData) throws IOException {
+ ByteArrayInputStream bais = new ByteArrayInputStream(compressedData.array());
+ BZip2CompressorInputStream inputStream = new BZip2CompressorInputStream(bais);
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
+
+ int readCount = -1;
+
+ while ( (readCount = inputStream.read(buffer, compressedData.position(), buffer.length))> 0) {
+ baos.write(buffer, 0, readCount);
+ }
+
+ ByteBuffer result = ByteBuffer.wrap(baos.toByteArray());
+ return result;
+ } finally {
+ inputStream.close();
+ }
+ }
+
+ private ByteArrayOutputStream getOutputBuffer(int suggestedLength) {
+ if (null == outputBuffer)
+ outputBuffer = new ByteArrayOutputStream(suggestedLength);
+ outputBuffer.reset();
+ return outputBuffer;
+ }
+
+}
Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java Thu Feb 14 20:47:49 2013
@@ -31,6 +31,8 @@ abstract class Codec {
return new DeflateCodec();
else if ("snappy".equals(name))
return new SnappyCodec();
+ else if ("bzip2".equals(name))
+ return new BZip2Codec();
else
throw new TrevniRuntimeException("Unknown codec: "+name);
}
Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java?rev=1446320&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java Thu Feb 14 20:47:49 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+public class TestBZip2Codec {
+
+ @Test
+ public void testBZip2CompressionAndDecompression() throws IOException {
+
+ MetaData meta = new MetaData();
+ meta.setCodec("bzip2");
+ Codec codec = Codec.get(meta);
+
+ //Confirm that the right codec Came back
+ assertTrue(codec instanceof BZip2Codec);
+
+ //This is 3 times the byte buffer on the BZip2 decompress plus some extra
+ final int inputByteSize = BZip2Codec.DEFAULT_BUFFER_SIZE * 3 + 42;
+
+ byte[] inputByteArray = new byte[inputByteSize];
+
+ //Generate something that will compress well
+ for (int i = 0; i < inputByteSize; i++) {
+ inputByteArray[i] = (byte)(65 + i % 10);
+ }
+
+ ByteBuffer inputByteBuffer = ByteBuffer.wrap(inputByteArray);
+
+ ByteBuffer compressedBuffer = codec.compress(inputByteBuffer);
+
+ //Make sure something returned
+ assertTrue(compressedBuffer.array().length > 0);
+ //Make sure the compressed output is smaller then the original
+ assertTrue(compressedBuffer.array().length < inputByteArray.length);
+
+ ByteBuffer decompressedBuffer = codec.decompress(compressedBuffer);
+
+ //The original array should be the same length as the decompressed array
+ assertTrue(decompressedBuffer.array().length == inputByteArray.length);
+
+ //Every byte in the outputByteArray should equal every byte in the input array
+ byte[] outputByteArray = decompressedBuffer.array();
+ for (int i = 0; i < inputByteSize; i++) {
+ inputByteArray[i] = outputByteArray[i];
+ }
+ }
+}
Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java
------------------------------------------------------------------------------
svn:eol-style = native