You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/03/22 16:08:30 UTC

svn commit: r1084210 - in /lucene/dev/trunk: dev-tools/eclipse/ modules/benchmark/ modules/benchmark/lib/ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/

Author: doronc
Date: Tue Mar 22 15:08:29 2011
New Revision: 1084210

URL: http://svn.apache.org/viewvc?rev=1084210&view=rev
Log:
LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1.

Added:
    lucene/dev/trunk/modules/benchmark/lib/commons-compress-1.1.jar   (with props)
Removed:
    lucene/dev/trunk/modules/benchmark/lib/commons-compress-1.0.jar
Modified:
    lucene/dev/trunk/dev-tools/eclipse/dot.classpath
    lucene/dev/trunk/modules/benchmark/CHANGES.txt
    lucene/dev/trunk/modules/benchmark/lib/commons-compress-NOTICE.txt
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java

Modified: lucene/dev/trunk/dev-tools/eclipse/dot.classpath
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/dev-tools/eclipse/dot.classpath?rev=1084210&r1=1084209&r2=1084210&view=diff
==============================================================================
--- lucene/dev/trunk/dev-tools/eclipse/dot.classpath (original)
+++ lucene/dev/trunk/dev-tools/eclipse/dot.classpath Tue Mar 22 15:08:29 2011
@@ -90,7 +90,7 @@
 	<classpathentry kind="lib" path="modules/analysis/phonetic/lib/commons-codec-1.4.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-beanutils-1.7.0.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-collections-3.1.jar"/>
-	<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.0.jar"/>
+	<classpathentry kind="lib" path="modules/benchmark/lib/commons-compress-1.1.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-digester-1.7.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-logging-1.0.4.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>

Modified: lucene/dev/trunk/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/CHANGES.txt?rev=1084210&r1=1084209&r2=1084210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/benchmark/CHANGES.txt Tue Mar 22 15:08:29 2011
@@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
 
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
+03/22/2011
+  LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as 
+  the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream
+  to commons-compress 1.0 made it 15 times slower. In 1.1 no such slow-down
+  is observed. (Doron Cohen)   
+  
 03/21/2011
   LUCENE-2958: WriteLineDocTask improvements - allow to emit line docs also for empty
   docs, and be flexible about which fields are added to the line file. For this, a header

Added: lucene/dev/trunk/modules/benchmark/lib/commons-compress-1.1.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/lib/commons-compress-1.1.jar?rev=1084210&view=auto
==============================================================================
Binary file - no diff available.

Modified: lucene/dev/trunk/modules/benchmark/lib/commons-compress-NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/lib/commons-compress-NOTICE.txt?rev=1084210&r1=1084209&r2=1084210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/lib/commons-compress-NOTICE.txt (original)
+++ lucene/dev/trunk/modules/benchmark/lib/commons-compress-NOTICE.txt Tue Mar 22 15:08:29 2011
@@ -1,15 +1,5 @@
 Apache Commons Compress
-Copyright 2002-2009 The Apache Software Foundation
+Copyright 2002-2010 The Apache Software Foundation
 
 This product includes software developed by
 The Apache Software Foundation (http://www.apache.org/).
-
-Original BZip2 classes contributed by Keiron Liddle
-<ke...@aftexsw.com>, Aftex Software to the Apache Ant project
-
-Original Tar classes from contributors of the Apache Ant project
-
-Original Zip classes from contributors of the Apache Ant project
-
-Original CPIO classes contributed by Markus Kuss and the jRPM project
-(jrpm.sourceforge.net)

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1084210&r1=1084209&r2=1084210&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Tue Mar 22 15:08:29 2011
@@ -55,15 +55,13 @@ import org.apache.lucene.benchmark.byTas
  */
 public abstract class ContentSource {
   
-  private static final int BZIP = 0;
-  private static final int GZIP = 1;
-  private static final int OTHER = 2;
-  private static final Map<String,Integer> extensionToType = new HashMap<String,Integer>();
+  private static final Map<String,String> extensionToType = new HashMap<String,String>();
   static {
-    extensionToType.put(".bz2", Integer.valueOf(BZIP));
-    extensionToType.put(".bzip", Integer.valueOf(BZIP));
-    extensionToType.put(".gz", Integer.valueOf(GZIP));
-    extensionToType.put(".gzip", Integer.valueOf(GZIP));
+  	// these in are lower case, we will lower case at the test as well
+    extensionToType.put(".bz2", CompressorStreamFactory.BZIP2);
+    extensionToType.put(".bzip", CompressorStreamFactory.BZIP2);
+    extensionToType.put(".gz", CompressorStreamFactory.GZIP);
+    extensionToType.put(".gzip", CompressorStreamFactory.GZIP);
   }
   
   protected static final int BUFFER_SIZE = 1 << 16; // 64K
@@ -128,28 +126,15 @@ public abstract class ContentSource {
     
     String fileName = file.getName();
     int idx = fileName.lastIndexOf('.');
-    int type = OTHER;
+    String type = null;
     if (idx != -1) {
-      Integer typeInt = extensionToType.get(fileName.substring(idx));
-      if (typeInt != null) {
-        type = typeInt.intValue();
-      }
+      type = extensionToType.get(fileName.substring(idx));
     }
     
     try {
-      switch (type) {
-        case BZIP:
-          // According to BZip2CompressorInputStream's code, it reads the first 
-          // two file header chars ('B' and 'Z'). It is important to wrap the
-          // underlying input stream with a buffered one since
-          // Bzip2CompressorInputStream uses the read() method exclusively.
-          is = csFactory.createCompressorInputStream("bzip2", is);
-          break;
-        case GZIP:
-          is = csFactory.createCompressorInputStream("gz", is);
-          break;
-        default: // Do nothing, stay with FileInputStream
-      }
+      if (type!=null) { // bzip or gzip
+        return csFactory.createCompressorInputStream(type, is);
+      } 
     } catch (CompressorException e) {
       IOException ioe = new IOException(e.getMessage());
       ioe.initCause(e);