You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/27 12:15:04 UTC
svn commit: r798096 - in /lucene/java/trunk/contrib/benchmark: ./
src/java/org/apache/lucene/benchmark/byTask/feeds/
src/java/org/apache/lucene/benchmark/byTask/tasks/
src/test/org/apache/lucene/benchmark/byTask/
src/test/org/apache/lucene/benchmark/by...
Author: mikemccand
Date: Mon Jul 27 10:15:03 2009
New Revision: 798096
URL: http://svn.apache.org/viewvc?rev=798096&view=rev
Log:
LUCENE-1595: don't use SortField.AUTO; deprecate LineDocMaker & EnwikiDocMaker
Added:
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
- copied, changed from r797694, lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java
Removed:
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java
Modified:
lucene/java/trunk/contrib/benchmark/CHANGES.txt
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java
lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Mon Jul 27 10:15:03 2009
@@ -4,6 +4,16 @@
$Id:$
+7/24/2009
+ LUCENE-1595: Deprecate LineDocMaker and EnwikiDocMaker in favor of
+ using DocMaker directly, with content.source = LineDocSource or
+ EnwikiContentSource. NOTE: with this change, the "id" field from
+ the Wikipedia XML export is now indexed as the "docname" field
+ (previously it was indexed as "docid"). Additionaly, the
+ SearchWithSort task now accepts all types that SortField can accept
+ and no longer falls back to SortField.AUTO, which has been
+ deprecated. (Mike McCandless)
+
7/20/2009
LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either
a title or body (or both). (Shai Erera via Mark Miller)
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Jul 27 10:15:03 2009
@@ -24,6 +24,7 @@
import java.util.Map;
import java.util.Properties;
import java.util.Map.Entry;
+import java.util.Random;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
@@ -60,6 +61,9 @@
* <li><b>doc.reuse.fields</b> - specifies whether Field and Document objects
* should be reused (default <b>true</b>).
* <li><b>doc.index.props</b> - specifies whether the properties returned by
+ * <li><b>doc.random.id.limit</b> - if specified, docs will be assigned random
+ * IDs from 0 to this limit. This is useful with UpdateDoc
+ * for testing performance of IndexWriter.updateDocument.
* {@link DocData#getProps()} will be indexed. (default <b>false</b>).
* </ul>
*/
@@ -70,11 +74,14 @@
private int cnt;
}
+ private Random r;
+ private int updateDocIDLimit;
+
static class DocState {
- private Map fields;
- private boolean reuseFields;
- Document doc;
+ private final Map fields;
+ private final boolean reuseFields;
+ final Document doc;
DocData docData = new DocData();
public DocState(boolean reuseFields, Store store, Index index, Index bodyIndex, TermVector termVector) {
@@ -92,6 +99,9 @@
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
doc = new Document();
+ } else {
+ fields = null;
+ doc = null;
}
}
@@ -150,14 +160,14 @@
// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
- int docid = incrNumDocsCreated();
- DocState ds = reuseFields ? getDocState() : localDocState;
- Document doc = reuseFields ? ds.doc : new Document();
+
+ final DocState ds = reuseFields ? getDocState() : localDocState;
+ final Document doc = reuseFields ? ds.doc : new Document();
doc.getFields().clear();
// Set ID_FIELD
Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
- idField.setValue("doc" + docid);
+ idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
doc.add(idField);
// Set NAME_FIELD
@@ -407,6 +417,11 @@
}
indexProperties = config.get("doc.index.props", false);
+
+ updateDocIDLimit = config.get("doc.random.id.limit", -1);
+ if (updateDocIDLimit != -1) {
+ r = new Random(179);
+ }
}
}
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java Mon Jul 27 10:15:03 2009
@@ -28,43 +28,14 @@
* A {@link DocMaker} which reads the English Wikipedia dump. Uses
* {@link EnwikiContentSource} as its content source, regardless if a different
* content source was defined in the configuration.
+ * @deprecated Please use {@link DocMaker} instead, with content.source=EnwikiContentSource
*/
public class EnwikiDocMaker extends DocMaker {
-
- public Document makeDocument() throws Exception {
- DocState ds = reuseFields ? getDocState() : localDocState;
- DocData dd = source.getNextDocData(ds.docData);
- Document doc = reuseFields ? ds.doc : new Document();
- doc.getFields().clear();
-
- Field body = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
- body.setValue(dd.getBody());
- doc.add(body);
-
- Field title = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal);
- title.setValue(dd.getTitle());
- doc.add(title);
-
- Field date = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
- date.setValue(dd.getDate());
- doc.add(date);
-
- Field id = ds.getField(ID_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
- id.setValue(dd.getName());
- doc.add(id);
-
- return doc;
- }
-
- public Document makeDocument(int size) throws Exception {
- throw new RuntimeException("cannot change document size with EnwikiDocMaker");
- }
-
public void setConfig(Config config) {
super.setConfig(config);
// Override whatever content source was set in the config
source = new EnwikiContentSource();
source.setConfig(config);
+ System.out.println("NOTE: EnwikiDocMaker is deprecated; please use DocMaker instead (which is the default if you don't specify doc.maker) with content.source=EnwikiContentSource");
}
-
-}
\ No newline at end of file
+}
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java Mon Jul 27 10:15:03 2009
@@ -42,51 +42,13 @@
* 0..N; this is useful with UpdateDoc to test updating random documents; if
* this is unspecified or -1, then docid is sequentially assigned
* </ul>
+ * @deprecated Please use {@link DocMaker} instead, with content.source=LineDocSource
*/
public class LineDocMaker extends DocMaker {
-
- private Random r;
- private int numDocs;
-
- public Document makeDocument() throws Exception {
-
- DocState ds = reuseFields ? getDocState() : localDocState;
- DocData dd = source.getNextDocData(ds.docData);
- Document doc = reuseFields ? ds.doc : new Document();
- doc.getFields().clear();
-
- Field body = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
- body.setValue(dd.getBody());
- doc.add(body);
-
- Field title = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal);
- title.setValue(dd.getTitle());
- doc.add(title);
-
- Field date = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
- date.setValue(dd.getDate());
- doc.add(date);
-
- String docID = "doc" + (r != null ? r.nextInt(numDocs) : incrNumDocsCreated());
- Field id = ds.getField(ID_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
- id.setValue(docID);
- doc.add(id);
-
- return doc;
- }
-
- public Document makeDocument(int size) throws Exception {
- throw new RuntimeException("cannot change document size with LineDocMaker");
- }
-
public void setConfig(Config config) {
super.setConfig(config);
source = new LineDocSource();
source.setConfig(config);
- numDocs = config.get("doc.random.id.limit", -1);
- if (numDocs != -1) {
- r = new Random(179);
- }
+ System.out.println("NOTE: LineDocMaker is deprecated; please use DocMaker instead (which is the default if you don't specify doc.maker) with content.source=LineDocSource");
}
-
}
Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java Mon Jul 27 10:15:03 2009
@@ -56,6 +56,8 @@
SortField sortField0;
if (field.equals("doc")) {
sortField0 = SortField.FIELD_DOC;
+ } if (field.equals("score")) {
+ sortField0 = SortField.FIELD_SCORE;
} else if (field.equals("noscore")) {
doScore = false;
continue;
@@ -90,14 +92,22 @@
int type;
if (typeString.equals("float")) {
type = SortField.FLOAT;
+ } else if (typeString.equals("double")) {
+ type = SortField.DOUBLE;
+ } else if (typeString.equals("byte")) {
+ type = SortField.BYTE;
+ } else if (typeString.equals("short")) {
+ type = SortField.SHORT;
} else if (typeString.equals("int")) {
type = SortField.INT;
+ } else if (typeString.equals("long")) {
+ type = SortField.LONG;
} else if (typeString.equals("string")) {
type = SortField.STRING;
} else if (typeString.equals("string_val")) {
type = SortField.STRING_VAL;
} else {
- type = SortField.AUTO;
+ throw new RuntimeException("Unrecognized sort field type " + typeString);
}
return type;
}
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Jul 27 10:15:03 2009
@@ -299,7 +299,7 @@
}
/**
- * Test WriteLineDoc and LineDocMaker.
+ * Test WriteLineDoc and LineDocSource.
*/
public void testLineDocFile() throws Exception {
File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");
@@ -334,7 +334,7 @@
String algLines2[] = {
"# ----- properties ",
"analyzer=org.apache.lucene.analysis.SimpleAnalyzer",
- "doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",
+ "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
"content.source.forever=false",
"doc.reuse.fields=false",
@@ -355,7 +355,7 @@
iw.close();
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
- assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
+ assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
ir.close();
lineFile.delete();
Copied: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (from r797694, lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java)
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?p2=lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java&p1=lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java&r1=797694&r2=798096&rev=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Mon Jul 27 10:15:03 2009
@@ -39,8 +39,8 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-/** Tests the functionality of {@link LineDocMaker}. */
-public class LineDocMakerTest extends BenchmarkTestCase {
+/** Tests the functionality of {@link LineDocSource}. */
+public class LineDocSourceTest extends BenchmarkTestCase {
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
@@ -70,7 +70,7 @@
Properties props = new Properties();
- // LineDocMaker specific settings.
+ // LineDocSource specific settings.
props.setProperty("docs.file", file.getAbsolutePath());
if (setBZCompress) {
props.setProperty("bzip.compression", bz2CompressVal);
@@ -78,7 +78,7 @@
// Indexing configuration.
props.setProperty("analyzer", SimpleAnalyzer.class.getName());
- props.setProperty("doc.maker", LineDocMaker.class.getName());
+ props.setProperty("content.source", LineDocSource.class.getName());
props.setProperty("directory", "RAMDirectory");
// Create PerfRunData
@@ -98,7 +98,7 @@
searcher.close();
}
- /* Tests LineDocMaker with a bzip2 input stream. */
+ /* Tests LineDocSource with a bzip2 input stream. */
public void testBZip2() throws Exception {
File file = new File(getWorkDir(), "one-line.bz2");
createBZ2LineFile(file);
Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Mon Jul 27 10:15:03 2009
@@ -172,7 +172,7 @@
public void testCharsReplace() throws Exception {
// WriteLineDocTask replaced only \t characters w/ a space, since that's its
// separator char. However, it didn't replace newline characters, which
- // resulted in errors in LineDocMaker.
+ // resulted in errors in LineDocSource.
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, NewLinesDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);