You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by do...@apache.org on 2011/03/24 13:58:10 UTC
svn commit: r1084941 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/benchmark/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucen...
Author: doronc
Date: Thu Mar 24 12:58:09 2011
New Revision: 1084941
URL: http://svn.apache.org/viewvc?rev=1084941&view=rev
Log:
LUCENE-2977: WriteLineDocTask should write gzip/bzip2/txt according to the extension of specified output file name - merge from trunk.
Added:
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
- copied unchanged from r1084929, lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
- copied unchanged from r1084929, lucene/dev/trunk/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
Removed:
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/ContentSourceTest.java
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/CHANGES.txt?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/CHANGES.txt Thu Mar 24 12:58:09 2011
@@ -2,6 +2,11 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
+03/24/2011
+ LUCENE-2977: WriteLineDocTask now automatically detects how to write -
+ GZip or BZip2 or Plain-text - according to the output file extension.
+ Property bzip.compression of WriteLineDocTask was canceled. (Doron Cohen)
+
03/23/2011
LUCENE-2980: Benchmark's ContentSource no more requires lower case file suffixes
for detecting file type (gzip/bzip2/text). As part of this fix worked around an
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Thu Mar 24 12:58:09 2011
@@ -17,19 +17,11 @@ package org.apache.lucene.benchmark.byTa
* limitations under the License.
*/
-import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-import org.apache.commons.compress.compressors.CompressorException;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.byTask.utils.Config;
/**
@@ -56,17 +48,6 @@ import org.apache.lucene.benchmark.byTas
*/
public abstract class ContentSource {
- private static final Map<String,String> extensionToType = new HashMap<String,String>();
- static {
- // these in are lower case, we will lower case at the test as well
- extensionToType.put(".bz2", CompressorStreamFactory.BZIP2);
- extensionToType.put(".bzip", CompressorStreamFactory.BZIP2);
- extensionToType.put(".gz", CompressorStreamFactory.GZIP);
- extensionToType.put(".gzip", CompressorStreamFactory.GZIP);
- }
-
- protected static final int BUFFER_SIZE = 1 << 16; // 64K
-
private long bytesCount;
private long totalBytesCount;
private int docsCount;
@@ -78,8 +59,6 @@ public abstract class ContentSource {
protected boolean verbose;
protected String encoding;
- private CompressorStreamFactory csFactory = new CompressorStreamFactory();
-
/** update count of bytes generated by this source */
protected final synchronized void addBytes(long numBytes) {
bytesCount += numBytes;
@@ -114,63 +93,6 @@ public abstract class ContentSource {
}
}
- /**
- * Returns an {@link InputStream} over the requested file. This method
- * attempts to identify the appropriate {@link InputStream} instance to return
- * based on the file name (e.g., if it ends with .bz2 or .bzip, return a
- * 'bzip' {@link InputStream}).
- */
- protected InputStream getInputStream(File file) throws IOException {
- // First, create a FileInputStream, as this will be required by all types.
- // Wrap with BufferedInputStream for better performance
- InputStream is = new BufferedInputStream(new FileInputStream(file), BUFFER_SIZE);
-
- String fileName = file.getName();
- int idx = fileName.lastIndexOf('.');
- String type = null;
- if (idx != -1) {
- type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
- }
-
- if (type!=null) { // bzip or gzip
- try {
- return closableCompressorInputStream(type,is);
- } catch (CompressorException e) {
- IOException ioe = new IOException(e.getMessage());
- ioe.initCause(e);
- throw ioe;
- }
- }
-
- return is;
- }
-
- /**
- * Wrap the compressor input stream so that calling close will also close
- * the underlying stream - workaround for CommonsCompress bug (COMPRESS-127).
- */
- private InputStream closableCompressorInputStream(String type, final InputStream is) throws CompressorException {
- final InputStream delegee = csFactory.createCompressorInputStream(type, is);
- if (!type.equals(CompressorStreamFactory.GZIP)) {
- return delegee; //compressor bug affects only gzip
- }
- return new InputStream() {
- @Override public int read() throws IOException { return delegee.read(); }
- @Override public int read(byte[] b) throws IOException { return delegee.read(b); }
- @Override public int available() throws IOException { return delegee.available(); }
- @Override public synchronized void mark(int readlimit) { delegee.mark(readlimit); }
- @Override public boolean markSupported() { return delegee.markSupported(); }
- @Override public int read(byte[] b, int off, int len) throws IOException { return delegee.read(b, off, len); }
- @Override public synchronized void reset() throws IOException { delegee.reset(); }
- @Override public long skip(long n) throws IOException { return delegee.skip(n); }
- @Override
- public void close() throws IOException {
- delegee.close();
- is.close();
- }
- };
- }
-
/**
* Returns true whether it's time to log a message (depending on verbose and
* the number of documents generated).
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java Thu Mar 24 12:58:09 2011
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.util.ThreadInterruptedException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
@@ -189,7 +190,7 @@ public class EnwikiContentSource extends
return;
} else if (localFileIS == is) {
// If file is not already re-opened then re-open it now
- is = getInputStream(file);
+ is = StreamUtils.inputStream(file);
}
}
}
@@ -290,7 +291,7 @@ public class EnwikiContentSource extends
@Override
public void resetInputs() throws IOException {
super.resetInputs();
- is = getInputStream(file);
+ is = StreamUtils.inputStream(file);
}
@Override
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Thu Mar 24 12:58:09 2011
@@ -28,6 +28,7 @@ import java.util.Properties;
import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
/**
* A {@link ContentSource} reading one line at a time as a
@@ -178,8 +179,8 @@ public class LineDocSource extends Conte
if (reader != null) {
reader.close();
}
- InputStream is = getInputStream(file);
- reader = new BufferedReader(new InputStreamReader(is, encoding), BUFFER_SIZE);
+ InputStream is = StreamUtils.inputStream(file);
+ reader = new BufferedReader(new InputStreamReader(is, encoding), StreamUtils.BUFFER_SIZE);
if (skipHeaderLine) {
reader.readLine(); // skip one line - the header line - already handled that info
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java Thu Mar 24 12:58:09 2011
@@ -32,6 +32,7 @@ import java.util.Locale;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser.ParsePathType;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
import org.apache.lucene.util.ThreadInterruptedException;
@@ -194,8 +195,8 @@ public class TrecContentSource extends C
System.out.println("opening: " + f + " length: " + f.length());
}
try {
- InputStream inputStream = getInputStream(f); // support either gzip, bzip2, or regular text file, by extension
- reader = new BufferedReader(new InputStreamReader(inputStream, encoding), BUFFER_SIZE);
+ InputStream inputStream = StreamUtils.inputStream(f); // support either gzip, bzip2, or regular text file, by extension
+ reader = new BufferedReader(new InputStreamReader(inputStream, encoding), StreamUtils.BUFFER_SIZE);
currPathType = TrecDocParser.pathType(f);
return;
} catch (Exception e) {
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java Thu Mar 24 12:58:09 2011
@@ -17,9 +17,8 @@ package org.apache.lucene.benchmark.byTa
* limitations under the License.
*/
-import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
-import java.io.FileOutputStream;
+import java.io.File;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
@@ -28,10 +27,10 @@ import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -40,14 +39,17 @@ import org.apache.lucene.document.Field;
* following format: title <TAB> date <TAB> body. The output of this
* task can be consumed by
* {@link org.apache.lucene.benchmark.byTask.feeds.LineDocSource} and is intended
- * to save the IO overhead of opening a file per document to be indexed.<br>
+ * to save the IO overhead of opening a file per document to be indexed.
+ * <p>
+ * The format of the output is set according to the output file extension.
+ * Compression is recommended when the output file is expected to be large.
+ * See info on file extensions in {@link StreamUtils.Type}
+ * <p>
* Supports the following parameters:
* <ul>
- * <li><b>line.file.out<b> - the name of the file to write the output to. That
+ * <li><b>line.file.out</b> - the name of the file to write the output to. That
* parameter is mandatory. <b>NOTE:</b> the file is re-created.
- * <li><b>bzip.compression<b> - whether the output should be bzip-compressed. This is
- * recommended when the output file is expected to be large.
- * <li><b>line.fields<b> - which fields should be written in each line.
+ * <li><b>line.fields</b> - which fields should be written in each line.
* (optional, default: {@link #DEFAULT_FIELDS}).
* <li><b>sufficient.fields</b> - list of field names, separated by comma, which,
* if all of them are missing, the document will be skipped. For example, to require
@@ -91,30 +93,12 @@ public class WriteLineDocTask extends Pe
public WriteLineDocTask(PerfRunData runData) throws Exception {
super(runData);
Config config = runData.getConfig();
- String fileName = config.get("line.file.out", null);
- if (fileName == null) {
+ String fname = config.get("line.file.out", null);
+ if (fname == null) {
throw new IllegalArgumentException("line.file.out must be set");
}
-
- OutputStream out = new FileOutputStream(fileName);
- boolean doBzipCompression = false;
- String doBZCompress = config.get("bzip.compression", null);
- if (doBZCompress != null) {
- // Property was set, use the value.
- doBzipCompression = Boolean.valueOf(doBZCompress).booleanValue();
- } else {
- // Property was not set, attempt to detect based on file's extension
- doBzipCompression = fileName.endsWith("bz2");
- }
-
- if (doBzipCompression) {
- // Wrap with BOS since BZip2CompressorOutputStream calls out.write(int)
- // and does not use the write(byte[]) version. This proved to speed the
- // compression process by 70% !
- out = new BufferedOutputStream(out, 1 << 16);
- out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
- }
- lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
+ OutputStream out = StreamUtils.outputStream(new File(fname));
+ lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), StreamUtils.BUFFER_SIZE));
docMaker = runData.getDocMaker();
// init fields
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Thu Mar 24 12:58:09 2011
@@ -103,23 +103,19 @@ public class LineDocSourceTest extends B
writer.close();
}
- private void doIndexAndSearchTest(File file, boolean setBZCompress,
- String bz2CompressVal, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
- doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 1, storedField); // no extra repetitions
- doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 2, storedField); // 1 extra repetition
- doIndexAndSearchTestWithRepeats(file, setBZCompress, bz2CompressVal, lineParserClass, 4, storedField); // 3 extra repetitions
+ private void doIndexAndSearchTest(File file, Class<? extends LineParser> lineParserClass, String storedField) throws Exception {
+ doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
+ doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
+ doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
}
- private void doIndexAndSearchTestWithRepeats(File file, boolean setBZCompress,
- String bz2CompressVal, Class<? extends LineParser> lineParserClass, int numAdds, String storedField) throws Exception {
+ private void doIndexAndSearchTestWithRepeats(File file,
+ Class<? extends LineParser> lineParserClass, int numAdds, String storedField) throws Exception {
Properties props = new Properties();
// LineDocSource specific settings.
props.setProperty("docs.file", file.getAbsolutePath());
- if (setBZCompress) {
- props.setProperty("bzip.compression", bz2CompressVal);
- }
if (lineParserClass != null) {
props.setProperty("line.parser", lineParserClass.getName());
}
@@ -160,37 +156,31 @@ public class LineDocSourceTest extends B
public void testBZip2() throws Exception {
File file = new File(getWorkDir(), "one-line.bz2");
createBZ2LineFile(file,true);
- doIndexAndSearchTest(file, true, "true", null, null);
+ doIndexAndSearchTest(file, null, null);
}
public void testBZip2NoHeaderLine() throws Exception {
File file = new File(getWorkDir(), "one-line.bz2");
createBZ2LineFile(file,false);
- doIndexAndSearchTest(file, true, "true", null, null);
- }
-
- public void testBZip2AutoDetect() throws Exception {
- File file = new File(getWorkDir(), "one-line.bz2");
- createBZ2LineFile(file,false);
- doIndexAndSearchTest(file, false, null, null, null);
+ doIndexAndSearchTest(file, null, null);
}
public void testRegularFile() throws Exception {
File file = new File(getWorkDir(), "one-line");
createRegularLineFile(file,true);
- doIndexAndSearchTest(file, false, null, null, null);
+ doIndexAndSearchTest(file, null, null);
}
public void testRegularFileSpecialHeader() throws Exception {
File file = new File(getWorkDir(), "one-line");
createRegularLineFile(file,true);
- doIndexAndSearchTest(file, false, null, HeaderLineParser.class, null);
+ doIndexAndSearchTest(file, HeaderLineParser.class, null);
}
public void testRegularFileNoHeaderLine() throws Exception {
File file = new File(getWorkDir(), "one-line");
createRegularLineFile(file,false);
- doIndexAndSearchTest(file, false, null, null, null);
+ doIndexAndSearchTest(file, null, null);
}
public void testInvalidFormat() throws Exception {
@@ -210,7 +200,7 @@ public class LineDocSourceTest extends B
writer.newLine();
writer.close();
try {
- doIndexAndSearchTest(file, false, null, null, null);
+ doIndexAndSearchTest(file, null, null);
fail("Some exception should have been thrown for: [" + testCases[i] + "]");
} catch (Exception e) {
// expected.
@@ -222,7 +212,7 @@ public class LineDocSourceTest extends B
public void testWithDocsName() throws Exception {
File file = new File(getWorkDir(), "one-line");
createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
- doIndexAndSearchTest(file, false, null, null, DocMaker.NAME_FIELD);
+ doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
}
/** Use fields names that are not defined in Docmaker and so will go to Properties */
@@ -230,7 +220,7 @@ public class LineDocSourceTest extends B
File file = new File(getWorkDir(), "one-line");
String specialField = "mySpecialField";
createRegularLineFileWithMoreFields(file, specialField);
- doIndexAndSearchTest(file, false, null, null, specialField);
+ doIndexAndSearchTest(file, null, specialField);
}
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=1084941&r1=1084940&r2=1084941&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Thu Mar 24 12:58:09 2011
@@ -31,6 +31,7 @@ import org.apache.lucene.benchmark.Bench
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -135,16 +136,12 @@ public class WriteLineDocTaskTest extend
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
- private PerfRunData createPerfRunData(File file, boolean setBZCompress,
+ private PerfRunData createPerfRunData(File file,
boolean allowEmptyDocs,
- String bz2CompressVal,
String docMakerName) throws Exception {
Properties props = new Properties();
props.setProperty("doc.maker", docMakerName);
props.setProperty("line.file.out", file.getAbsolutePath());
- if (setBZCompress) {
- props.setProperty("bzip.compression", bz2CompressVal);
- }
props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
if (allowEmptyDocs) {
props.setProperty("sufficient.fields", ",");
@@ -157,11 +154,19 @@ public class WriteLineDocTaskTest extend
return new PerfRunData(config);
}
- private void doReadTest(File file, boolean bz2File, String expTitle,
+ private void doReadTest(File file, Type fileType, String expTitle,
String expDate, String expBody) throws Exception {
InputStream in = new FileInputStream(file);
- if (bz2File) {
- in = csFactory.createCompressorInputStream("bzip2", in);
+ switch(fileType) {
+ case BZIP2:
+ in = csFactory.createCompressorInputStream(CompressorStreamFactory.BZIP2, in);
+ break;
+ case GZIP:
+ in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in);
+ case PLAIN:
+ break; // nothing to do
+ default:
+ assertFalse("Unknown file type!",true); //fail, should not happen
}
BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
try {
@@ -192,36 +197,37 @@ public class WriteLineDocTaskTest extend
// Create a document in bz2 format.
File file = new File(getWorkDir(), "one-line.bz2");
- PerfRunData runData = createPerfRunData(file, true, false, "true", WriteLineDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, true, "title", "date", "body");
+ doReadTest(file, Type.BZIP2, "title", "date", "body");
}
- public void testBZip2AutoDetect() throws Exception {
+ /* Tests WriteLineDocTask with a gzip format. */
+ public void testGZip() throws Exception {
- // Create a document in bz2 format.
- File file = new File(getWorkDir(), "one-line.bz2");
- PerfRunData runData = createPerfRunData(file, false, false, null, WriteLineDocMaker.class.getName());
+ // Create a document in gz format.
+ File file = new File(getWorkDir(), "one-line.gz");
+ PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, true, "title", "date", "body");
+ doReadTest(file, Type.GZIP, "title", "date", "body");
}
public void testRegularFile() throws Exception {
// Create a document in regular format.
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, true, false, "false", WriteLineDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, WriteLineDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, false, "title", "date", "body");
+ doReadTest(file, Type.PLAIN, "title", "date", "body");
}
public void testCharsReplace() throws Exception {
@@ -229,12 +235,12 @@ public class WriteLineDocTaskTest extend
// separator char. However, it didn't replace newline characters, which
// resulted in errors in LineDocSource.
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, NewLinesDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, NewLinesDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, false, "title text", "date text", "body text two");
+ doReadTest(file, Type.PLAIN, "title text", "date text", "body text two");
}
public void testEmptyBody() throws Exception {
@@ -242,28 +248,28 @@ public class WriteLineDocTaskTest extend
// had a TITLE element (LUCENE-1755). It should throw away documents if they
// don't have BODY nor TITLE
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, NoBodyDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, NoBodyDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, false, "title", "date", null);
+ doReadTest(file, Type.PLAIN, "title", "date", null);
}
public void testEmptyTitle() throws Exception {
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, NoTitleDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, NoTitleDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
- doReadTest(file, false, "", "date", "body");
+ doReadTest(file, Type.PLAIN, "", "date", "body");
}
/** Fail by default when there's only date */
public void testJustDate() throws Exception {
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, JustDateDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, JustDateDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
@@ -281,7 +287,7 @@ public class WriteLineDocTaskTest extend
public void testLegalJustDate() throws Exception {
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, LegalJustDateDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, LegalJustDateDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
@@ -299,7 +305,7 @@ public class WriteLineDocTaskTest extend
public void testEmptyDoc() throws Exception {
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, true, null, EmptyDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, true, EmptyDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
@@ -317,7 +323,7 @@ public class WriteLineDocTaskTest extend
public void testMultiThreaded() throws Exception {
File file = new File(getWorkDir(), "one-line");
- PerfRunData runData = createPerfRunData(file, false, false, null, ThreadingDocMaker.class.getName());
+ PerfRunData runData = createPerfRunData(file, false, ThreadingDocMaker.class.getName());
final WriteLineDocTask wldt = new WriteLineDocTask(runData);
Thread[] threads = new Thread[10];
for (int i = 0; i < threads.length; i++) {