You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/27 12:15:04 UTC

svn commit: r798096 - in /lucene/java/trunk/contrib/benchmark: ./ src/java/org/apache/lucene/benchmark/byTask/feeds/ src/java/org/apache/lucene/benchmark/byTask/tasks/ src/test/org/apache/lucene/benchmark/byTask/ src/test/org/apache/lucene/benchmark/by...

Author: mikemccand
Date: Mon Jul 27 10:15:03 2009
New Revision: 798096

URL: http://svn.apache.org/viewvc?rev=798096&view=rev
Log:
LUCENE-1595: don't use SortField.AUTO; deprecate LineDocMaker & EnwikiDocMaker

Added:
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java
      - copied, changed from r797694, lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java
Removed:
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java
Modified:
    lucene/java/trunk/contrib/benchmark/CHANGES.txt
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java
    lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
    lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java

Modified: lucene/java/trunk/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/CHANGES.txt?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/benchmark/CHANGES.txt Mon Jul 27 10:15:03 2009
@@ -4,6 +4,16 @@
 
 $Id:$
 
+7/24/2009
+  LUCENE-1595: Deprecate LineDocMaker and EnwikiDocMaker in favor of
+  using DocMaker directly, with content.source = LineDocSource or
+  EnwikiContentSource.  NOTE: with this change, the "id" field from
+  the Wikipedia XML export is now indexed as the "docname" field
+  (previously it was indexed as "docid").  Additionaly, the
+  SearchWithSort task now accepts all types that SortField can accept
+  and no longer falls back to SortField.AUTO, which has been
+  deprecated. (Mike McCandless)
+
 7/20/2009
   LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either 
   a title or body (or both).  (Shai Erera via Mark Miller)

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Jul 27 10:15:03 2009
@@ -24,6 +24,7 @@
 import java.util.Map;
 import java.util.Properties;
 import java.util.Map.Entry;
+import java.util.Random;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.Format;
@@ -60,6 +61,9 @@
  * <li><b>doc.reuse.fields</b> - specifies whether Field and Document objects
  * should be reused (default <b>true</b>).
  * <li><b>doc.index.props</b> - specifies whether the properties returned by
+ * <li><b>doc.random.id.limit</b> - if specified, docs will be assigned random
+ * IDs from 0 to this limit.  This is useful with UpdateDoc
+ * for testing performance of IndexWriter.updateDocument.
  * {@link DocData#getProps()} will be indexed. (default <b>false</b>).
  * </ul>
  */
@@ -70,11 +74,14 @@
     private int cnt;
   }
 
+  private Random r;
+  private int updateDocIDLimit;
+
   static class DocState {
     
-    private Map fields;
-    private boolean reuseFields;
-    Document doc;
+    private final Map fields;
+    private final boolean reuseFields;
+    final Document doc;
     DocData docData = new DocData();
     
     public DocState(boolean reuseFields, Store store, Index index, Index bodyIndex, TermVector termVector) {
@@ -92,6 +99,9 @@
         fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
         
         doc = new Document();
+      } else {
+        fields = null;
+        doc = null;
       }
     }
 
@@ -150,14 +160,14 @@
   // use only part of the body, modify it to keep the rest (or use all if size==0).
   // reset the docdata properties so they are not added more than once.
   private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
-    int docid = incrNumDocsCreated();
-    DocState ds = reuseFields ? getDocState() : localDocState;
-    Document doc = reuseFields ? ds.doc : new Document();
+
+    final DocState ds = reuseFields ? getDocState() : localDocState;
+    final Document doc = reuseFields ? ds.doc : new Document();
     doc.getFields().clear();
     
     // Set ID_FIELD
     Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
-    idField.setValue("doc" + docid);
+    idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
     doc.add(idField);
     
     // Set NAME_FIELD
@@ -407,6 +417,11 @@
     }
     
     indexProperties = config.get("doc.index.props", false);
+
+    updateDocIDLimit = config.get("doc.random.id.limit", -1);
+    if (updateDocIDLimit != -1) {
+      r = new Random(179);
+    }
   }
 
 }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiDocMaker.java Mon Jul 27 10:15:03 2009
@@ -28,43 +28,14 @@
  * A {@link DocMaker} which reads the English Wikipedia dump. Uses
  * {@link EnwikiContentSource} as its content source, regardless if a different
  * content source was defined in the configuration.
+ * @deprecated Please use {@link DocMaker} instead, with content.source=EnwikiContentSource
  */
 public class EnwikiDocMaker extends DocMaker {
-  
-  public Document makeDocument() throws Exception {
-    DocState ds = reuseFields ? getDocState() : localDocState;
-    DocData dd = source.getNextDocData(ds.docData);
-    Document doc = reuseFields ? ds.doc : new Document();
-    doc.getFields().clear();
-
-    Field body = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
-    body.setValue(dd.getBody());
-    doc.add(body);
-    
-    Field title = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal);
-    title.setValue(dd.getTitle());
-    doc.add(title);
-    
-    Field date = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
-    date.setValue(dd.getDate());
-    doc.add(date);
-    
-    Field id = ds.getField(ID_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
-    id.setValue(dd.getName());
-    doc.add(id);
-    
-    return doc;
-  }
-
-  public Document makeDocument(int size) throws Exception {
-    throw new RuntimeException("cannot change document size with EnwikiDocMaker");
-  }
-
   public void setConfig(Config config) {
     super.setConfig(config);
     // Override whatever content source was set in the config
     source = new EnwikiContentSource();
     source.setConfig(config);
+    System.out.println("NOTE: EnwikiDocMaker is deprecated; please use DocMaker instead (which is the default if you don't specify doc.maker) with content.source=EnwikiContentSource");
   }
-  
-}
\ No newline at end of file
+}

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocMaker.java Mon Jul 27 10:15:03 2009
@@ -42,51 +42,13 @@
  * 0..N; this is useful with UpdateDoc to test updating random documents; if
  * this is unspecified or -1, then docid is sequentially assigned
  * </ul>
+ * @deprecated Please use {@link DocMaker} instead, with content.source=LineDocSource
  */
 public class LineDocMaker extends DocMaker {
-
-  private Random r;
-  private int numDocs;
-
-  public Document makeDocument() throws Exception {
-
-    DocState ds = reuseFields ? getDocState() : localDocState;
-    DocData dd = source.getNextDocData(ds.docData);
-    Document doc = reuseFields ? ds.doc : new Document();
-    doc.getFields().clear();
-
-    Field body = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
-    body.setValue(dd.getBody());
-    doc.add(body);
-    
-    Field title = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal);
-    title.setValue(dd.getTitle());
-    doc.add(title);
-    
-    Field date = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
-    date.setValue(dd.getDate());
-    doc.add(date);
-    
-    String docID = "doc" + (r != null ? r.nextInt(numDocs) : incrNumDocsCreated());
-    Field id = ds.getField(ID_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
-    id.setValue(docID);
-    doc.add(id);
-    
-    return doc;
-  }
-
-  public Document makeDocument(int size) throws Exception {
-    throw new RuntimeException("cannot change document size with LineDocMaker");
-  }
-  
   public void setConfig(Config config) {
     super.setConfig(config);
     source = new LineDocSource();
     source.setConfig(config);
-    numDocs = config.get("doc.random.id.limit", -1);
-    if (numDocs != -1) {
-      r = new Random(179);
-    }
+    System.out.println("NOTE: LineDocMaker is deprecated; please use DocMaker instead (which is the default if you don't specify doc.maker) with content.source=LineDocSource");
   }
-
 }

Modified: lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java Mon Jul 27 10:15:03 2009
@@ -56,6 +56,8 @@
       SortField sortField0;
       if (field.equals("doc")) {
         sortField0 = SortField.FIELD_DOC;
+      } if (field.equals("score")) {
+        sortField0 = SortField.FIELD_SCORE;
       } else if (field.equals("noscore")) {
         doScore = false;
         continue;
@@ -90,14 +92,22 @@
     int type;
     if (typeString.equals("float")) {
       type = SortField.FLOAT;
+    } else if (typeString.equals("double")) {
+      type = SortField.DOUBLE;
+    } else if (typeString.equals("byte")) {
+      type = SortField.BYTE;
+    } else if (typeString.equals("short")) {
+      type = SortField.SHORT;
     } else if (typeString.equals("int")) {
       type = SortField.INT;
+    } else if (typeString.equals("long")) {
+      type = SortField.LONG;
     } else if (typeString.equals("string")) {
       type = SortField.STRING;
     } else if (typeString.equals("string_val")) {
       type = SortField.STRING_VAL;
     } else {
-      type = SortField.AUTO;
+      throw new RuntimeException("Unrecognized sort field type " + typeString);
     }
     return type;
   }

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Jul 27 10:15:03 2009
@@ -299,7 +299,7 @@
   }
 
   /**
-   * Test WriteLineDoc and LineDocMaker.
+   * Test WriteLineDoc and LineDocSource.
    */
   public void testLineDocFile() throws Exception {
     File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");
@@ -334,7 +334,7 @@
     String algLines2[] = {
       "# ----- properties ",
       "analyzer=org.apache.lucene.analysis.SimpleAnalyzer",
-      "doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
       "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
       "content.source.forever=false",
       "doc.reuse.fields=false",
@@ -355,7 +355,7 @@
     iw.close();
 
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
-    assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
+    assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
     ir.close();
 
     lineFile.delete();

Copied: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java (from r797694, lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java)
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java?p2=lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java&p1=lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java&r1=797694&r2=798096&rev=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocMakerTest.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/LineDocSourceTest.java Mon Jul 27 10:15:03 2009
@@ -39,8 +39,8 @@
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 
-/** Tests the functionality of {@link LineDocMaker}. */
-public class LineDocMakerTest extends BenchmarkTestCase {
+/** Tests the functionality of {@link LineDocSource}. */
+public class LineDocSourceTest extends BenchmarkTestCase {
 
   private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
 
@@ -70,7 +70,7 @@
 
     Properties props = new Properties();
     
-    // LineDocMaker specific settings.
+    // LineDocSource specific settings.
     props.setProperty("docs.file", file.getAbsolutePath());
     if (setBZCompress) {
       props.setProperty("bzip.compression", bz2CompressVal);
@@ -78,7 +78,7 @@
     
     // Indexing configuration.
     props.setProperty("analyzer", SimpleAnalyzer.class.getName());
-    props.setProperty("doc.maker", LineDocMaker.class.getName());
+    props.setProperty("content.source", LineDocSource.class.getName());
     props.setProperty("directory", "RAMDirectory");
     
     // Create PerfRunData
@@ -98,7 +98,7 @@
     searcher.close();
   }
   
-  /* Tests LineDocMaker with a bzip2 input stream. */
+  /* Tests LineDocSource with a bzip2 input stream. */
   public void testBZip2() throws Exception {
     File file = new File(getWorkDir(), "one-line.bz2");
     createBZ2LineFile(file);

Modified: lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=798096&r1=798095&r2=798096&view=diff
==============================================================================
--- lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/java/trunk/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Mon Jul 27 10:15:03 2009
@@ -172,7 +172,7 @@
   public void testCharsReplace() throws Exception {
     // WriteLineDocTask replaced only \t characters w/ a space, since that's its
     // separator char. However, it didn't replace newline characters, which
-    // resulted in errors in LineDocMaker.
+    // resulted in errors in LineDocSource.
     File file = new File(getWorkDir(), "one-line");
     PerfRunData runData = createPerfRunData(file, false, null, NewLinesDocMaker.class.getName());
     WriteLineDocTask wldt = new WriteLineDocTask(runData);