You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2013/02/14 21:47:49 UTC

svn commit: r1446320 - in /avro/trunk: ./ lang/java/trevni/core/ lang/java/trevni/core/src/main/java/org/apache/trevni/ lang/java/trevni/core/src/test/java/org/apache/trevni/

Author: cutting
Date: Thu Feb 14 20:47:49 2013
New Revision: 1446320

URL: http://svn.apache.org/r1446320
Log:
AVRO-1253. Java: Add support for bzip2 file compression to Trevni.  Contributed by Ted Malaska.

Added:
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java   (with props)
    avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java   (with props)
Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/java/trevni/core/pom.xml
    avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Feb 14 20:47:49 2013
@@ -15,6 +15,9 @@ Trunk (not yet released)
     AVRO-1243. Java: Add support for bzip2 file compression and
     translate Hadoop job compression options. (Ted Malaska via cutting)
 
+    AVRO-1253. Java: Add support for bzip2 file compression to Trevni.
+    (Ted Malaska via cutting)
+
   IMPROVEMENTS
 
     AVRO-1211. Add MR guide to documentation. (Skye Wanderman-Milne via

Modified: avro/trunk/lang/java/trevni/core/pom.xml
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/pom.xml?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/pom.xml (original)
+++ avro/trunk/lang/java/trevni/core/pom.xml Thu Feb 14 20:47:49 2013
@@ -43,6 +43,10 @@
       <artifactId>snappy-java</artifactId>
       <version>${snappy.version}</version>
       <scope>compile</scope>
+    </dependency>    
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
     </dependency>
   </dependencies>
 

Added: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java?rev=1446320&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java Thu Feb 14 20:47:49 2013
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+
+public class BZip2Codec extends Codec {
+
+  private ByteArrayOutputStream outputBuffer;
+  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+  
+  @Override
+  ByteBuffer compress(ByteBuffer uncompressedData) throws IOException {
+    ByteArrayOutputStream baos = getOutputBuffer(uncompressedData.remaining());
+    BZip2CompressorOutputStream outputStream = new BZip2CompressorOutputStream(baos);
+
+    try {
+      outputStream.write(uncompressedData.array());
+    } finally {
+      outputStream.close();
+    }
+
+    ByteBuffer result = ByteBuffer.wrap(baos.toByteArray());
+    return result;
+  }
+
+  @Override
+  ByteBuffer decompress(ByteBuffer compressedData) throws IOException {
+    ByteArrayInputStream bais = new ByteArrayInputStream(compressedData.array());
+    BZip2CompressorInputStream inputStream = new BZip2CompressorInputStream(bais);
+    try {
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+      byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
+
+      int readCount = -1;
+      
+      while ( (readCount = inputStream.read(buffer, compressedData.position(), buffer.length))> 0) {
+        baos.write(buffer, 0, readCount);
+      }
+      
+      ByteBuffer result = ByteBuffer.wrap(baos.toByteArray());
+      return result;
+    } finally {
+      inputStream.close();
+    }
+  }
+  
+  private ByteArrayOutputStream getOutputBuffer(int suggestedLength) {
+    if (null == outputBuffer)
+      outputBuffer = new ByteArrayOutputStream(suggestedLength);
+    outputBuffer.reset();
+    return outputBuffer;
+  }
+  
+}

Propchange: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/BZip2Codec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java?rev=1446320&r1=1446319&r2=1446320&view=diff
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java (original)
+++ avro/trunk/lang/java/trevni/core/src/main/java/org/apache/trevni/Codec.java Thu Feb 14 20:47:49 2013
@@ -31,6 +31,8 @@ abstract class Codec {
       return new DeflateCodec();
     else if ("snappy".equals(name))
       return new SnappyCodec();
+    else if ("bzip2".equals(name))
+      return new BZip2Codec();
     else
       throw new TrevniRuntimeException("Unknown codec: "+name);
   }

Added: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java?rev=1446320&view=auto
==============================================================================
--- avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java (added)
+++ avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java Thu Feb 14 20:47:49 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.trevni;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+public class TestBZip2Codec {
+  
+  @Test
+  public void testBZip2CompressionAndDecompression() throws IOException {
+    
+    MetaData meta = new MetaData();
+    meta.setCodec("bzip2");
+    Codec codec = Codec.get(meta);
+    
+    //Confirm that the right codec Came back
+    assertTrue(codec instanceof BZip2Codec);
+    
+    //This is 3 times the byte buffer on the BZip2 decompress plus some extra
+    final int inputByteSize = BZip2Codec.DEFAULT_BUFFER_SIZE * 3 + 42;
+    
+    byte[] inputByteArray = new byte[inputByteSize];
+    
+    //Generate something that will compress well
+    for (int i = 0; i < inputByteSize; i++) {
+      inputByteArray[i] = (byte)(65 + i % 10);
+    }
+    
+    ByteBuffer inputByteBuffer = ByteBuffer.wrap(inputByteArray);
+    
+    ByteBuffer compressedBuffer = codec.compress(inputByteBuffer);
+    
+    //Make sure something returned
+    assertTrue(compressedBuffer.array().length > 0);
+    //Make sure the compressed output is smaller then the original
+    assertTrue(compressedBuffer.array().length < inputByteArray.length);
+    
+    ByteBuffer decompressedBuffer = codec.decompress(compressedBuffer);
+    
+    //The original array should be the same length as the decompressed array
+    assertTrue(decompressedBuffer.array().length == inputByteArray.length);
+    
+    //Every byte in the outputByteArray should equal every byte in the input array 
+    byte[] outputByteArray = decompressedBuffer.array();
+    for (int i = 0; i < inputByteSize; i++) {
+      inputByteArray[i] = outputByteArray[i];
+    }
+  }
+}

Propchange: avro/trunk/lang/java/trevni/core/src/test/java/org/apache/trevni/TestBZip2Codec.java
------------------------------------------------------------------------------
    svn:eol-style = native