You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/03/23 12:38:54 UTC

svn commit: r1084544 - in /lucene/dev/trunk/modules/benchmark: CHANGES.txt src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java

Author: doronc
Date: Wed Mar 23 11:38:54 2011
New Revision: 1084544

URL: http://svn.apache.org/viewvc?rev=1084544&view=rev
Log:
LUCENE-2980: Benchmark's ContentSource made insensitive to letter case of file suffix.

Added:
    lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
Modified:
    lucene/dev/trunk/modules/benchmark/CHANGES.txt
    lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java

Modified: lucene/dev/trunk/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/CHANGES.txt?rev=1084544&r1=1084543&r2=1084544&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/benchmark/CHANGES.txt Wed Mar 23 11:38:54 2011
@@ -3,6 +3,12 @@ Lucene Benchmark Contrib Change Log
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
 03/22/2011
+  LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes 
+  for detecting file type (gzip/bzip2/text). As part of this fix worked around an 
+  issue with gzip/bzip input streams which were remaining open (See COMPRESS-127).
+  (Doron Cohen) 
+
+03/22/2011
   LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as 
   the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream
   to commons-compress 1.0 made it 15 times slower. In 1.1 no such slow-down

Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1084544&r1=1084543&r2=1084544&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Wed Mar 23 11:38:54 2011
@@ -25,6 +25,7 @@ import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.commons.compress.compressors.CompressorException;
@@ -128,23 +129,49 @@ public abstract class ContentSource {
     int idx = fileName.lastIndexOf('.');
     String type = null;
     if (idx != -1) {
-      type = extensionToType.get(fileName.substring(idx));
+      type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
     }
     
-    try {
-      if (type!=null) { // bzip or gzip
-        return csFactory.createCompressorInputStream(type, is);
-      } 
-    } catch (CompressorException e) {
-      IOException ioe = new IOException(e.getMessage());
-      ioe.initCause(e);
-      throw ioe;
-    }
+    if (type!=null) { // bzip or gzip
+    	try {
+    		return closableCompressorInputStream(type,is);
+    	} catch (CompressorException e) {
+    		IOException ioe = new IOException(e.getMessage());
+    		ioe.initCause(e);
+    		throw ioe;
+    	}
+    } 
     
     return is;
   }
   
   /**
+   * Wrap the compressor input stream so that calling close will also close
+   * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127). 
+   */
+  private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException {
+    final InputStream delegee = csFactory.createCompressorInputStream(type, is);
+    if (!type.equals(CompressorStreamFactory.GZIP)) {
+    	return delegee; //compressor bug affects only gzip
+    }
+    return new InputStream() {
+			@Override	public int read() throws IOException { return delegee.read();	}
+			@Override	public int read(byte[] b) throws IOException { return delegee.read(b);	}
+			@Override	public int available() throws IOException {	return delegee.available();	}
+			@Override	public synchronized void mark(int readlimit) { delegee.mark(readlimit);	}
+			@Override	public boolean markSupported() { return delegee.markSupported(); }
+			@Override	public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
+			@Override	public synchronized void reset() throws IOException {	delegee.reset(); }
+			@Override	public long skip(long n) throws IOException {	return delegee.skip(n);	}
+			@Override	
+			public void close() throws IOException { 
+				delegee.close();
+				is.close();
+			}
+    };
+	}
+
+	/**
    * Returns true whether it's time to log a message (depending on verbose and
    * the number of documents generated).
    */

Added: lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java?rev=1084544&view=auto
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java (added)
+++ lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java Wed Mar 23 11:38:54 2011
@@ -0,0 +1,127 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.util._TestUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class ContentSourceTest extends BenchmarkTestCase {
+  private static final String TEXT = "Some-Text..."; 
+  private File testDir;
+  private CompressorStreamFactory csFactory = new CompressorStreamFactory();
+  
+  @Test
+  public void testGetInputStreamPlainText() throws Exception {
+    assertReadText(textFile("txt"));
+    assertReadText(textFile("TXT"));
+  }
+
+  @Test
+  public void testGetInputStreamGzip() throws Exception {
+    assertReadText(gzipFile("gz"));
+    assertReadText(gzipFile("gzip"));
+    assertReadText(gzipFile("GZ"));
+    assertReadText(gzipFile("GZIP"));
+  }
+
+  @Test
+  public void testGetInputStreamBzip2() throws Exception {
+  	assertReadText(bzip2File("bz2"));
+  	assertReadText(bzip2File("bzip"));
+  	assertReadText(bzip2File("BZ2"));
+  	assertReadText(bzip2File("BZIP"));
+  }
+  
+  private File textFile(String ext) throws Exception {
+    File f = new File(testDir,"testfile." +	ext);
+    BufferedWriter w = new BufferedWriter(new FileWriter(f));
+    w.write(TEXT);
+    w.newLine();
+    w.close();
+    return f;
+  }
+  
+  private File gzipFile(String ext) throws Exception {
+    File f = new File(testDir,"testfile." +	ext);
+    OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f));
+    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+    w.write(TEXT);
+    w.newLine();
+    w.close();
+    return f;
+  }
+
+  private File bzip2File(String ext) throws Exception {
+  	File f = new File(testDir,"testfile." +	ext);
+  	OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f));
+  	BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+  	w.write(TEXT);
+  	w.newLine();
+  	w.close();
+  	return f;
+  }
+
+  private void assertReadText(File f) throws Exception {
+    ContentSource src = new ContentSource() {
+      @Override
+      public void close() throws IOException { 
+      }
+      @Override
+      public DocData getNextDocData(DocData docData) throws NoMoreDataException,
+      IOException { 
+        return null;
+      }
+    };
+    InputStream ir = src.getInputStream(f);
+    InputStreamReader in = new InputStreamReader(ir);
+    BufferedReader r = new BufferedReader(in);
+    String line = r.readLine();
+    assertEquals("Wrong text found in "+f.getName(), TEXT, line);
+    r.close();
+  }
+  
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    testDir = new File(getWorkDir(),"ContentSourceTest");
+    _TestUtil.rmDir(testDir);
+    assertTrue(testDir.mkdirs());
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    _TestUtil.rmDir(testDir);
+    super.tearDown();
+  }
+
+}