You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/03/23 12:38:54 UTC
svn commit: r1084544 - in /lucene/dev/trunk/modules/benchmark: CHANGES.txt
src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
Author: doronc
Date: Wed Mar 23 11:38:54 2011
New Revision: 1084544
URL: http://svn.apache.org/viewvc?rev=1084544&view=rev
Log:
LUCENE-2980: Benchmark's ContentSource made insensitive to letter case of file suffix.
Added:
lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
Modified:
lucene/dev/trunk/modules/benchmark/CHANGES.txt
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
Modified: lucene/dev/trunk/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/CHANGES.txt?rev=1084544&r1=1084543&r2=1084544&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/benchmark/CHANGES.txt Wed Mar 23 11:38:54 2011
@@ -3,6 +3,12 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
03/22/2011
+ LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes
+ for detecting file type (gzip/bzip2/text). As part of this fix worked around an
+ issue with gzip/bzip input streams which were remaining open (See COMPRESS-127).
+ (Doron Cohen)
+
+03/22/2011
LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as
the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream
to commons-compress 1.0 made it 15 times slower. In 1.1 no such slow-down
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1084544&r1=1084543&r2=1084544&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Wed Mar 23 11:38:54 2011
@@ -25,6 +25,7 @@ import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
import org.apache.commons.compress.compressors.CompressorException;
@@ -128,23 +129,49 @@ public abstract class ContentSource {
int idx = fileName.lastIndexOf('.');
String type = null;
if (idx != -1) {
- type = extensionToType.get(fileName.substring(idx));
+ type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
}
- try {
- if (type!=null) { // bzip or gzip
- return csFactory.createCompressorInputStream(type, is);
- }
- } catch (CompressorException e) {
- IOException ioe = new IOException(e.getMessage());
- ioe.initCause(e);
- throw ioe;
- }
+ if (type!=null) { // bzip or gzip
+ try {
+ return closableCompressorInputStream(type,is);
+ } catch (CompressorException e) {
+ IOException ioe = new IOException(e.getMessage());
+ ioe.initCause(e);
+ throw ioe;
+ }
+ }
return is;
}
/**
+ * Wrap the compressor input stream so that calling close will also close
+ * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127).
+ */
+ private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException {
+ final InputStream delegee = csFactory.createCompressorInputStream(type, is);
+ if (!type.equals(CompressorStreamFactory.GZIP)) {
+ return delegee; //compressor bug affects only gzip
+ }
+ return new InputStream() {
+ @Override public int read() throws IOException { return delegee.read(); }
+ @Override public int read(byte[] b) throws IOException { return delegee.read(b); }
+ @Override public int available() throws IOException { return delegee.available(); }
+ @Override public synchronized void mark(int readlimit) { delegee.mark(readlimit); }
+ @Override public boolean markSupported() { return delegee.markSupported(); }
+ @Override public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
+ @Override public synchronized void reset() throws IOException { delegee.reset(); }
+ @Override public long skip(long n) throws IOException { return delegee.skip(n); }
+ @Override
+ public void close() throws IOException {
+ delegee.close();
+ is.close();
+ }
+ };
+ }
+
+ /**
* Returns true whether it's time to log a message (depending on verbose and
* the number of documents generated).
*/
Added: lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java?rev=1084544&view=auto
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java (added)
+++ lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java Wed Mar 23 11:38:54 2011
@@ -0,0 +1,127 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.util._TestUtil;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class ContentSourceTest extends BenchmarkTestCase {
+ private static final String TEXT = "Some-Text...";
+ private File testDir;
+ private CompressorStreamFactory csFactory = new CompressorStreamFactory();
+
+ @Test
+ public void testGetInputStreamPlainText() throws Exception {
+ assertReadText(textFile("txt"));
+ assertReadText(textFile("TXT"));
+ }
+
+ @Test
+ public void testGetInputStreamGzip() throws Exception {
+ assertReadText(gzipFile("gz"));
+ assertReadText(gzipFile("gzip"));
+ assertReadText(gzipFile("GZ"));
+ assertReadText(gzipFile("GZIP"));
+ }
+
+ @Test
+ public void testGetInputStreamBzip2() throws Exception {
+ assertReadText(bzip2File("bz2"));
+ assertReadText(bzip2File("bzip"));
+ assertReadText(bzip2File("BZ2"));
+ assertReadText(bzip2File("BZIP"));
+ }
+
+ private File textFile(String ext) throws Exception {
+ File f = new File(testDir,"testfile." + ext);
+ BufferedWriter w = new BufferedWriter(new FileWriter(f));
+ w.write(TEXT);
+ w.newLine();
+ w.close();
+ return f;
+ }
+
+ private File gzipFile(String ext) throws Exception {
+ File f = new File(testDir,"testfile." + ext);
+ OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.GZIP, new FileOutputStream(f));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+ w.write(TEXT);
+ w.newLine();
+ w.close();
+ return f;
+ }
+
+ private File bzip2File(String ext) throws Exception {
+ File f = new File(testDir,"testfile." + ext);
+ OutputStream os = csFactory.createCompressorOutputStream(CompressorStreamFactory.BZIP2, new FileOutputStream(f));
+ BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+ w.write(TEXT);
+ w.newLine();
+ w.close();
+ return f;
+ }
+
+ private void assertReadText(File f) throws Exception {
+ ContentSource src = new ContentSource() {
+ @Override
+ public void close() throws IOException {
+ }
+ @Override
+ public DocData getNextDocData(DocData docData) throws NoMoreDataException,
+ IOException {
+ return null;
+ }
+ };
+ InputStream ir = src.getInputStream(f);
+ InputStreamReader in = new InputStreamReader(ir);
+ BufferedReader r = new BufferedReader(in);
+ String line = r.readLine();
+ assertEquals("Wrong text found in "+f.getName(), TEXT, line);
+ r.close();
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ testDir = new File(getWorkDir(),"ContentSourceTest");
+ _TestUtil.rmDir(testDir);
+ assertTrue(testDir.mkdirs());
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ _TestUtil.rmDir(testDir);
+ super.tearDown();
+ }
+
+}