You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2011/06/07 20:31:27 UTC

svn commit: r1133125 - in /hadoop/common/trunk: ./ src/java/ src/java/org/apache/hadoop/io/compress/ src/test/core/org/apache/hadoop/io/compress/

Author: tomwhite
Date: Tue Jun  7 18:31:26 2011
New Revision: 1133125

URL: http://svn.apache.org/viewvc?rev=1133125&view=rev
Log:
HADOOP-7323. Add capability to resolve compression codec based on codec name. Contributed by Alejandro Abdelnur.

Added:
    hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java   (with props)
Modified:
    hadoop/common/trunk/CHANGES.txt
    hadoop/common/trunk/src/java/core-default.xml
    hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
    hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodec.java
    hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodecFactory.java

Modified: hadoop/common/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=1133125&r1=1133124&r2=1133125&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Tue Jun  7 18:31:26 2011
@@ -199,6 +199,9 @@ Trunk (unreleased changes)
     HADOOP-7316. Add public javadocs to FSDataInputStream and
     FSDataOutputStream. (eli)
 
+    HADOOP-7323. Add capability to resolve compression codec based on codec
+    name. (Alejandro Abdelnur via tomwhite)
+
   OPTIMIZATIONS
   
     HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole

Modified: hadoop/common/trunk/src/java/core-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/core-default.xml?rev=1133125&r1=1133124&r2=1133125&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/core-default.xml (original)
+++ hadoop/common/trunk/src/java/core-default.xml Tue Jun  7 18:31:26 2011
@@ -174,7 +174,7 @@
 
 <property>
   <name>io.compression.codecs</name>
-  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
+  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec</value>
   <description>A list of the compression codec classes that can be used 
                for compression/decompression.</description>
 </property>

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java?rev=1133125&r1=1133124&r2=1133125&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java Tue Jun  7 18:31:26 2011
@@ -43,7 +43,14 @@ public class CompressionCodecFactory {
    * automatically supports finding the longest matching suffix. 
    */
   private SortedMap<String, CompressionCodec> codecs = null;
-  
+
+    /**
+     * A map from the reversed filename suffixes to the codecs.
+     * This is probably overkill, because the maps should be small, but it
+     * automatically supports finding the longest matching suffix.
+     */
+    private Map<String, CompressionCodec> codecsByName = null;
+
   /**
    * A map from class names to the codecs
    */
@@ -53,8 +60,15 @@ public class CompressionCodecFactory {
     String suffix = codec.getDefaultExtension();
     codecs.put(new StringBuilder(suffix).reverse().toString(), codec);
     codecsByClassName.put(codec.getClass().getCanonicalName(), codec);
+
+    String codecName = codec.getClass().getSimpleName();
+    codecsByName.put(codecName.toLowerCase(), codec);
+    if (codecName.endsWith("Codec")) {
+      codecName = codecName.substring(0, codecName.length() - "Codec".length());
+      codecsByName.put(codecName.toLowerCase(), codec);
+    }
   }
-  
+
   /**
    * Print the extension map out as a string.
    */
@@ -142,6 +156,7 @@ public class CompressionCodecFactory {
   public CompressionCodecFactory(Configuration conf) {
     codecs = new TreeMap<String, CompressionCodec>();
     codecsByClassName = new HashMap<String, CompressionCodec>();
+    codecsByName = new HashMap<String, CompressionCodec>();
     List<Class<? extends CompressionCodec>> codecClasses = getCodecClasses(conf);
     if (codecClasses == null) {
       addCodec(new GzipCodec());
@@ -190,6 +205,56 @@ public class CompressionCodecFactory {
     return codecsByClassName.get(classname);
   }
 
+    /**
+     * Find the relevant compression codec for the codec's canonical class name
+     * or by codec alias.
+     * <p/>
+     * Codec aliases are case insensitive.
+     * <p/>
+     * The code alias is the short class name (without the package name).
+     * If the short class name ends with 'Codec', then there are two aliases for
+     * the codec, the complete short class name and the short class name without
+     * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+     * alias are 'gzip' and 'gzipcodec'.
+     *
+     * @param codecName the canonical class name of the codec
+     * @return the codec object
+     */
+    public CompressionCodec getCodecByName(String codecName) {
+      if (codecsByClassName == null) {
+        return null;
+      }
+      CompressionCodec codec = getCodecByClassName(codecName);
+      if (codec == null) {
+        // trying to get the codec by name in case the name was specified instead a class
+        codec = codecsByName.get(codecName.toLowerCase());
+      }
+      return codec;
+    }
+
+    /**
+     * Find the relevant compression codec for the codec's canonical class name
+     * or by codec alias and returns its implemetation class.
+     * <p/>
+     * Codec aliases are case insensitive.
+     * <p/>
+     * The code alias is the short class name (without the package name).
+     * If the short class name ends with 'Codec', then there are two aliases for
+     * the codec, the complete short class name and the short class name without
+     * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+     * alias are 'gzip' and 'gzipcodec'.
+     *
+     * @param codecName the canonical class name of the codec
+     * @return the codec class
+     */
+    public Class<? extends CompressionCodec> getCodecClassByName(String codecName) {
+      CompressionCodec codec = getCodecByName(codecName);
+      if (codec == null) {
+        return null;
+      }
+      return codec.getClass();
+    }
+
   /**
    * Removes a suffix from a filename, if it has it.
    * @param filename the filename to strip

Added: hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java?rev=1133125&view=auto
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java (added)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java Tue Jun  7 18:31:26 2011
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+/**
+ * Alias class for DefaultCodec to enable codec discovery by 'deflate' name.
+ */
+public class DeflateCodec extends DefaultCodec {
+}

Propchange: hadoop/common/trunk/src/java/org/apache/hadoop/io/compress/DeflateCodec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodec.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodec.java?rev=1133125&r1=1133124&r2=1133125&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodec.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodec.java Tue Jun  7 18:31:26 2011
@@ -98,6 +98,12 @@ public class TestCodec {
   }
 
   @Test
+  public void testDeflateCodec() throws IOException {
+    codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.DeflateCodec");
+    codecTest(conf, seed, count, "org.apache.hadoop.io.compress.DeflateCodec");
+  }
+
+  @Test
   public void testGzipCodecWithParam() throws IOException {
     Configuration conf = new Configuration(this.conf);
     ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION);
@@ -427,6 +433,13 @@ public class TestCodec {
     sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.BZip2Codec", 1000000);
   }
 
+  @Test
+  public void testSequenceFileDeflateCodec() throws IOException, ClassNotFoundException,
+      InstantiationException, IllegalAccessException {
+    sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.DeflateCodec", 100);
+    sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DeflateCodec", 1000000);
+  }
+
   private static void sequenceFileCodecTest(Configuration conf, int lines, 
                                 String codecClass, int blockSize) 
     throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {

Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodecFactory.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodecFactory.java?rev=1133125&r1=1133124&r2=1133125&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodecFactory.java (original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/io/compress/TestCodecFactory.java Tue Jun  7 18:31:26 2011
@@ -131,12 +131,41 @@ public class TestCodecFactory extends Te
     checkCodec("default factory for .gz", GzipCodec.class, codec);
     codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
     checkCodec("default factory for gzip codec", GzipCodec.class, codec);
-    
+    codec = factory.getCodecByName("gzip");
+    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
+    codec = factory.getCodecByName("GZIP");
+    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
+    codec = factory.getCodecByName("GZIPCodec");
+    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
+    codec = factory.getCodecByName("gzipcodec");
+    checkCodec("default factory for gzip codec", GzipCodec.class, codec);
+    Class klass = factory.getCodecClassByName("gzipcodec");
+    assertEquals(GzipCodec.class, klass);
+
     codec = factory.getCodec(new Path("/tmp/foo.bz2"));
     checkCodec("default factory for .bz2", BZip2Codec.class, codec);
     codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
     checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
-    
+    codec = factory.getCodecByName("bzip2");
+    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
+    codec = factory.getCodecByName("bzip2codec");
+    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
+    codec = factory.getCodecByName("BZIP2");
+    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
+    codec = factory.getCodecByName("BZIP2CODEC");
+    checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
+
+    codec = factory.getCodecByClassName(DeflateCodec.class.getCanonicalName());
+    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
+    codec = factory.getCodecByName("deflate");
+    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
+    codec = factory.getCodecByName("deflatecodec");
+    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
+    codec = factory.getCodecByName("DEFLATE");
+    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
+    codec = factory.getCodecByName("DEFLATECODEC");
+    checkCodec("default factory for deflate codec", DeflateCodec.class, codec);
+
     factory = setClasses(new Class[0]);
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     assertEquals("empty codec bar codec", null, codec);
@@ -164,20 +193,32 @@ public class TestCodecFactory extends Te
     assertEquals("full factory for .bz2", null, codec);
     codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
     assertEquals("full codec bzip2 codec", null, codec);
-    
+
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     checkCodec("full factory bar codec", BarCodec.class, codec);
     codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
     checkCodec("full factory bar codec", BarCodec.class, codec);
-    
+    codec = factory.getCodecByName("bar");
+    checkCodec("full factory bar codec", BarCodec.class, codec);
+    codec = factory.getCodecByName("BAR");
+    checkCodec("full factory bar codec", BarCodec.class, codec);
+
     codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));
     checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
     codec = factory.getCodecByClassName(FooBarCodec.class.getCanonicalName());
     checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
-    
+    codec = factory.getCodecByName("foobar");
+    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
+    codec = factory.getCodecByName("FOOBAR");
+    checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
+
     codec = factory.getCodec(new Path("/tmp/foo.foo"));
     checkCodec("full factory foo codec", FooCodec.class, codec);
     codec = factory.getCodecByClassName(FooCodec.class.getCanonicalName());
     checkCodec("full factory foo codec", FooCodec.class, codec);
+    codec = factory.getCodecByName("foo");
+    checkCodec("full factory foo codec", FooCodec.class, codec);
+    codec = factory.getCodecByName("FOO");
+    checkCodec("full factory foo codec", FooCodec.class, codec);
   }
 }