You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2011/01/24 18:03:12 UTC
svn commit: r1062881 - in
/lucene/java/branches/lucene_3_0/contrib/benchmark/src:
java/org/apache/lucene/benchmark/byTask/feeds/
test/org/apache/lucene/benchmark/quality/
Author: mikemccand
Date: Mon Jan 24 17:03:11 2011
New Revision: 1062881
URL: http://svn.apache.org/viewvc?rev=1062881&view=rev
Log:
LUCENE-2826: backport to 3.0
Modified:
lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
lucene/java/branches/lucene_3_0/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
Modified: lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java?rev=1062881&r1=1062880&r2=1062881&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java (original)
+++ lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java Mon Jan 24 17:03:11 2011
@@ -29,6 +29,7 @@ public class DocData {
private String body;
private String title;
private String date;
+ private int id;
private Properties props;
public void clear() {
@@ -37,6 +38,7 @@ public class DocData {
title = null;
date = null;
props = null;
+ id = -1;
}
public String getBody() {
@@ -57,6 +59,10 @@ public class DocData {
return name;
}
+ public int getID() {
+ return id;
+ }
+
public Properties getProps() {
return props;
}
@@ -85,6 +91,10 @@ public class DocData {
this.name = name;
}
+ public void setID(int id) {
+ this.id = id;
+ }
+
public void setProps(Properties props) {
this.props = props;
}
Modified: lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1062881&r1=1062880&r2=1062881&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Jan 24 17:03:11 2011
@@ -20,14 +20,21 @@ package org.apache.lucene.benchmark.byTa
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
+import java.util.Calendar;
import java.util.Map;
import java.util.Properties;
+import java.util.Locale;
import java.util.Random;
+import java.util.Date;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.text.SimpleDateFormat;
+import java.text.ParsePosition;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
@@ -82,6 +89,7 @@ public class DocMaker {
static class DocState {
private final Map<String,Field> fields;
+ private final Map<String,NumericField> numericFields;
private final boolean reuseFields;
final Document doc;
DocData docData = new DocData();
@@ -92,6 +100,7 @@ public class DocMaker {
if (reuseFields) {
fields = new HashMap<String,Field>();
+ numericFields = new HashMap<String,NumericField>();
// Initialize the map with the default fields.
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
@@ -99,9 +108,13 @@ public class DocMaker {
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
+
+ numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD));
+ numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD));
doc = new Document();
} else {
+ numericFields = null;
fields = null;
doc = null;
}
@@ -124,18 +137,42 @@ public class DocMaker {
}
return f;
}
+
+ NumericField getNumericField(String name) {
+ if (!reuseFields) {
+ return new NumericField(name);
+ }
+
+ NumericField f = numericFields.get(name);
+ if (f == null) {
+ f = new NumericField(name);
+ numericFields.put(name, f);
+ }
+ return f;
+ }
}
- private int numDocsCreated = 0;
private boolean storeBytes = false;
+ private static class DateUtil {
+ public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
+ public Calendar cal = Calendar.getInstance();
+ public ParsePosition pos = new ParsePosition(0);
+ public DateUtil() {
+ parser.setLenient(true);
+ }
+ }
+
// leftovers are thread local, because it is unsafe to share residues between threads
private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
+ private ThreadLocal<DateUtil> dateParsers = new ThreadLocal<DateUtil>();
public static final String BODY_FIELD = "body";
public static final String TITLE_FIELD = "doctitle";
public static final String DATE_FIELD = "docdate";
+ public static final String DATE_MSEC_FIELD = "docdatenum";
+ public static final String TIME_SEC_FIELD = "doctimesecnum";
public static final String ID_FIELD = "docid";
public static final String BYTES_FIELD = "bytes";
public static final String NAME_FIELD = "docname";
@@ -155,6 +192,7 @@ public class DocMaker {
private int lastPrintedNumUniqueTexts = 0;
private long lastPrintedNumUniqueBytes = 0;
+ private final AtomicInteger numDocsCreated = new AtomicInteger();
private int printNum = 0;
@@ -169,7 +207,16 @@ public class DocMaker {
// Set ID_FIELD
Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
- idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
+ int id;
+ if (r != null) {
+ id = r.nextInt(updateDocIDLimit);
+ } else {
+ id = docData.getID();
+ if (id == -1) {
+ id = numDocsCreated.getAndIncrement();
+ }
+ }
+ idField.setValue(Integer.toString(id));
doc.add(idField);
// Set NAME_FIELD
@@ -181,13 +228,39 @@ public class DocMaker {
doc.add(nameField);
// Set DATE_FIELD
- String date = docData.getDate();
+ DateUtil util = dateParsers.get();
+ if (util == null) {
+ util = new DateUtil();
+ dateParsers.set(util);
+ }
+ Date date = null;
+ String dateString = docData.getDate();
+ if (dateString != null) {
+ util.pos.setIndex(0);
+ date = util.parser.parse(dateString, util.pos);
+ //System.out.println(dateString + " parsed to " + date);
+ } else {
+ dateString = "";
+ }
+ Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
+ dateStringField.setValue(dateString);
+ doc.add(dateStringField);
+
if (date == null) {
- date = "";
+ // just set to right now
+ date = new Date();
}
- Field dateField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
- dateField.setValue(date);
+
+ NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD);
+ dateField.setLongValue(date.getTime());
doc.add(dateField);
+
+ util.cal.setTime(date);
+ final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND);
+
+ NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD);
+ timeSecField.setIntValue(sec);
+ doc.add(timeSecField);
// Set TITLE_FIELD
String title = docData.getTitle();
@@ -252,10 +325,6 @@ public class DocMaker {
return ds;
}
- protected synchronized int incrNumDocsCreated() {
- return numDocsCreated++;
- }
-
/**
* Closes the {@link DocMaker}. The base implementation closes the
* {@link ContentSource}, and it can be overridden to do more work (but make
@@ -331,9 +400,9 @@ public class DocMaker {
public void printDocStatistics() {
boolean print = false;
String col = " ";
- StringBuffer sb = new StringBuffer();
+ StringBuilder sb = new StringBuilder();
String newline = System.getProperty("line.separator");
- sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
+ sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
int nut = source.getTotalDocsCount();
if (nut > lastPrintedNumUniqueTexts) {
print = true;
@@ -363,7 +432,7 @@ public class DocMaker {
// re-initiate since properties by round may have changed.
setConfig(config);
source.resetInputs();
- numDocsCreated = 0;
+ numDocsCreated.set(0);
resetLeftovers();
}
Modified: lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java?rev=1062881&r1=1062880&r2=1062881&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java (original)
+++ lucene/java/branches/lucene_3_0/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Mon Jan 24 17:03:11 2011
@@ -48,6 +48,7 @@ public class LineDocSource extends Conte
private File file;
private BufferedReader reader;
+ private int readCount;
private synchronized void openFile() {
try {
@@ -71,9 +72,12 @@ public class LineDocSource extends Conte
@Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
- String line;
+ final String line;
+ final int myID;
+
synchronized(this) {
line = reader.readLine();
+ myID = readCount++;
if (line == null) {
if (!forever) {
throw new NoMoreDataException();
@@ -96,6 +100,7 @@ public class LineDocSource extends Conte
}
// The date String was written in the format of DateTools.dateToString.
docData.clear();
+ docData.setID(myID);
docData.setBody(line.substring(1 + spot2, line.length()));
docData.setTitle(line.substring(0, spot));
docData.setDate(line.substring(1 + spot, spot2));
Modified: lucene/java/branches/lucene_3_0/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_3_0/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt?rev=1062881&r1=1062880&r2=1062881&view=diff
==============================================================================
--- lucene/java/branches/lucene_3_0/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt (original)
+++ lucene/java/branches/lucene_3_0/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecQRels.txt Mon Jan 24 17:03:11 2011
@@ -40,64 +40,64 @@
0 0 fakedoc3 1
0 0 fakedoc4 1
-0 0 doc18211 1
-0 0 doc20192 1
-0 0 doc7401 1
-0 0 doc11285 1
-0 0 doc20647 1
-0 0 doc3057 1
-0 0 doc12431 1
-0 0 doc4989 1
-0 0 doc17324 1
-0 0 doc4030 1
-0 0 doc4290 1
-0 0 doc3462 1
-0 0 doc15313 1
-0 0 doc10303 1
-0 0 doc1893 1
-0 0 doc5008 1
-0 0 doc14634 1
-0 0 doc5471 1
-0 0 doc17904 1
-0 0 doc7168 1
-0 0 doc21275 1
-0 0 doc9011 1
-0 0 doc17546 1
-0 0 doc9102 1
-0 0 doc13199 1
+0 0 18211 1
+0 0 20192 1
+0 0 7401 1
+0 0 11285 1
+0 0 20647 1
+0 0 3057 1
+0 0 12431 1
+0 0 4989 1
+0 0 17324 1
+0 0 4030 1
+0 0 4290 1
+0 0 3462 1
+0 0 15313 1
+0 0 10303 1
+0 0 1893 1
+0 0 5008 1
+0 0 14634 1
+0 0 5471 1
+0 0 17904 1
+0 0 7168 1
+0 0 21275 1
+0 0 9011 1
+0 0 17546 1
+0 0 9102 1
+0 0 13199 1
# --- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
-1 0 doc9857 0
-1 0 doc16846 1
-1 0 doc4320 1
-1 0 doc9501 0
-1 0 doc10159 1
-1 0 doc16642 1
-1 0 doc17536 0
-1 0 doc17571 1
-1 0 doc18728 1
-1 0 doc18828 1
-1 0 doc19108 0
-1 0 doc9940 1
-1 0 doc11852 1
-1 0 doc7430 0
-1 0 doc19162 1
-1 0 doc1743 1
-1 0 doc2137 1
-1 0 doc7611 1
-1 0 doc8072 1
-1 0 doc12764 1
-1 0 doc2593 1
-1 0 doc11088 1
-1 0 doc931 1
-1 0 doc7673 1
-1 0 doc12941 1
-1 0 doc11797 1
-1 0 doc11831 1
-1 0 doc13162 1
-1 0 doc4423 1
-1 0 doc5217 1
+1 0 9857 0
+1 0 16846 1
+1 0 4320 1
+1 0 9501 0
+1 0 10159 1
+1 0 16642 1
+1 0 17536 0
+1 0 17571 1
+1 0 18728 1
+1 0 18828 1
+1 0 19108 0
+1 0 9940 1
+1 0 11852 1
+1 0 7430 0
+1 0 19162 1
+1 0 1743 1
+1 0 2137 1
+1 0 7611 1
+1 0 8072 1
+1 0 12764 1
+1 0 2593 1
+1 0 11088 1
+1 0 931 1
+1 0 7673 1
+1 0 12941 1
+1 0 11797 1
+1 0 11831 1
+1 0 13162 1
+1 0 4423 1
+1 0 5217 1
# ---- m==2: all precision, precision_at_n and recall are hurt.
@@ -106,200 +106,200 @@
2 0 fakedoc3 1
2 0 fakedoc4 1
-2 0 doc3137 0
-2 0 doc7142 0
-2 0 doc13667 0
-2 0 doc13171 0
-2 0 doc13372 1
-2 0 doc21415 1
-2 0 doc16298 1
-2 0 doc14957 1
-2 0 doc153 1
-2 0 doc16092 1
-2 0 doc16096 1
-2 0 doc21303 1
-2 0 doc18681 1
-2 0 doc20756 1
-2 0 doc355 1
-2 0 doc13395 1
-2 0 doc5009 1
-2 0 doc17164 1
-2 0 doc13162 1
-2 0 doc11757 1
-2 0 doc9637 1
-2 0 doc18087 1
-2 0 doc4593 1
-2 0 doc4677 1
-2 0 doc20865 1
-2 0 doc8556 1
-2 0 doc2578 1
-2 0 doc1163 1
-2 0 doc3797 1
-2 0 doc11094 1
-
-
-3 0 doc19578 1
-3 0 doc14860 1
-3 0 doc7235 1
-3 0 doc20590 1
-3 0 doc17933 1
-3 0 doc9384 1
-3 0 doc10783 1
-3 0 doc1963 1
-3 0 doc18356 1
-3 0 doc13254 1
-3 0 doc18402 1
-3 0 doc15241 1
-3 0 doc3303 1
-3 0 doc8868 1
-3 0 doc18520 1
-3 0 doc4650 1
-3 0 doc4727 1
-3 0 doc21518 1
-3 0 doc5060 1
-3 0 doc7587 1
-3 0 doc2990 1
-3 0 doc8042 1
-3 0 doc6304 1
-3 0 doc13223 1
-3 0 doc1964 1
-3 0 doc10597 1
-3 0 doc21023 1
-3 0 doc19057 1
-3 0 doc14948 1
-3 0 doc9692 1
-
-
-4 0 doc2534 1
-4 0 doc21388 1
-4 0 doc20923 1
-4 0 doc11547 1
-4 0 doc19755 1
-4 0 doc3793 1
-4 0 doc6714 1
-4 0 doc12722 1
-4 0 doc5552 1
-4 0 doc6810 1
-4 0 doc16953 1
-4 0 doc2527 1
-4 0 doc5361 1
-4 0 doc12353 1
-4 0 doc7308 1
-4 0 doc3836 1
-4 0 doc2293 1
-4 0 doc7348 1
-4 0 doc17119 1
-4 0 doc19331 1
-4 0 doc3411 1
-4 0 doc14643 1
-4 0 doc9058 1
-4 0 doc11099 1
-4 0 doc12485 1
-4 0 doc16432 1
-4 0 doc10047 1
-4 0 doc13788 1
-4 0 doc117 1
-4 0 doc638 1
-
-
-
-5 0 doc169 1
-5 0 doc13181 1
-5 0 doc4350 1
-5 0 doc10242 1
-5 0 doc955 1
-5 0 doc5389 1
-5 0 doc17122 1
-5 0 doc17417 1
-5 0 doc12199 1
-5 0 doc6918 1
-5 0 doc3857 1
-5 0 doc2981 1
-5 0 doc10639 1
-5 0 doc10478 1
-5 0 doc8573 1
-5 0 doc9197 1
-5 0 doc9298 1
-5 0 doc2492 1
-5 0 doc10262 1
-5 0 doc5180 1
-5 0 doc11758 1
-5 0 doc4065 1
-5 0 doc9124 1
-5 0 doc11528 1
-5 0 doc18879 1
-5 0 doc17864 1
-5 0 doc3204 1
-5 0 doc12157 1
-5 0 doc4496 1
-5 0 doc20190 1
-
-
-
-6 0 doc9507 1
-6 0 doc15630 1
-6 0 doc8469 1
-6 0 doc11918 1
-6 0 doc20482 1
-6 0 doc20158 1
-6 0 doc19831 1
-6 0 doc8296 1
-6 0 doc8930 1
-6 0 doc16460 1
-6 0 doc2577 1
-6 0 doc15476 1
-6 0 doc1767 1
-6 0 doc689 1
-6 0 doc16606 1
-6 0 doc6149 1
-6 0 doc18691 1
-6 0 doc2208 1
-6 0 doc3592 1
-6 0 doc11199 1
-6 0 doc16329 1
-6 0 doc6007 1
-6 0 doc15231 1
-6 0 doc20622 1
-6 0 doc21468 1
-6 0 doc12230 1
-6 0 doc5723 1
-6 0 doc8120 1
-6 0 doc8668 1
-6 0 doc303 1
-
-
-
-
-7 0 doc7728 1
-7 0 doc7693 1
-7 0 doc21088 1
-7 0 doc5017 1
-7 0 doc10807 1
-7 0 doc16204 1
-7 0 doc2233 1
-7 0 doc3632 1
-7 0 doc4719 1
-7 0 doc6477 1
-7 0 doc6502 1
-7 0 doc6709 1
-7 0 doc7710 1
-7 0 doc9193 1
-7 0 doc9309 1
-7 0 doc9789 1
-7 0 doc10971 1
-7 0 doc18059 1
-7 0 doc19906 1
-7 0 doc20089 1
-7 0 doc20102 1
-7 0 doc21040 1
-7 0 doc21153 1
-7 0 doc9147 1
-7 0 doc9930 1
-7 0 doc19763 1
-7 0 doc1559 1
-7 0 doc21248 1
-7 0 doc17945 1
-7 0 doc526 1
+2 0 3137 0
+2 0 7142 0
+2 0 13667 0
+2 0 13171 0
+2 0 13372 1
+2 0 21415 1
+2 0 16298 1
+2 0 14957 1
+2 0 153 1
+2 0 16092 1
+2 0 16096 1
+2 0 21303 1
+2 0 18681 1
+2 0 20756 1
+2 0 355 1
+2 0 13395 1
+2 0 5009 1
+2 0 17164 1
+2 0 13162 1
+2 0 11757 1
+2 0 9637 1
+2 0 18087 1
+2 0 4593 1
+2 0 4677 1
+2 0 20865 1
+2 0 8556 1
+2 0 2578 1
+2 0 1163 1
+2 0 3797 1
+2 0 11094 1
+
+
+3 0 19578 1
+3 0 14860 1
+3 0 7235 1
+3 0 20590 1
+3 0 17933 1
+3 0 9384 1
+3 0 10783 1
+3 0 1963 1
+3 0 18356 1
+3 0 13254 1
+3 0 18402 1
+3 0 15241 1
+3 0 3303 1
+3 0 8868 1
+3 0 18520 1
+3 0 4650 1
+3 0 4727 1
+3 0 21518 1
+3 0 5060 1
+3 0 7587 1
+3 0 2990 1
+3 0 8042 1
+3 0 6304 1
+3 0 13223 1
+3 0 1964 1
+3 0 10597 1
+3 0 21023 1
+3 0 19057 1
+3 0 14948 1
+3 0 9692 1
+
+
+4 0 2534 1
+4 0 21388 1
+4 0 20923 1
+4 0 11547 1
+4 0 19755 1
+4 0 3793 1
+4 0 6714 1
+4 0 12722 1
+4 0 5552 1
+4 0 6810 1
+4 0 16953 1
+4 0 2527 1
+4 0 5361 1
+4 0 12353 1
+4 0 7308 1
+4 0 3836 1
+4 0 2293 1
+4 0 7348 1
+4 0 17119 1
+4 0 19331 1
+4 0 3411 1
+4 0 14643 1
+4 0 9058 1
+4 0 11099 1
+4 0 12485 1
+4 0 16432 1
+4 0 10047 1
+4 0 13788 1
+4 0 117 1
+4 0 638 1
+
+
+
+5 0 169 1
+5 0 13181 1
+5 0 4350 1
+5 0 10242 1
+5 0 955 1
+5 0 5389 1
+5 0 17122 1
+5 0 17417 1
+5 0 12199 1
+5 0 6918 1
+5 0 3857 1
+5 0 2981 1
+5 0 10639 1
+5 0 10478 1
+5 0 8573 1
+5 0 9197 1
+5 0 9298 1
+5 0 2492 1
+5 0 10262 1
+5 0 5180 1
+5 0 11758 1
+5 0 4065 1
+5 0 9124 1
+5 0 11528 1
+5 0 18879 1
+5 0 17864 1
+5 0 3204 1
+5 0 12157 1
+5 0 4496 1
+5 0 20190 1
+
+
+
+6 0 9507 1
+6 0 15630 1
+6 0 8469 1
+6 0 11918 1
+6 0 20482 1
+6 0 20158 1
+6 0 19831 1
+6 0 8296 1
+6 0 8930 1
+6 0 16460 1
+6 0 2577 1
+6 0 15476 1
+6 0 1767 1
+6 0 689 1
+6 0 16606 1
+6 0 6149 1
+6 0 18691 1
+6 0 2208 1
+6 0 3592 1
+6 0 11199 1
+6 0 16329 1
+6 0 6007 1
+6 0 15231 1
+6 0 20622 1
+6 0 21468 1
+6 0 12230 1
+6 0 5723 1
+6 0 8120 1
+6 0 8668 1
+6 0 303 1
+
+
+
+
+7 0 7728 1
+7 0 7693 1
+7 0 21088 1
+7 0 5017 1
+7 0 10807 1
+7 0 16204 1
+7 0 2233 1
+7 0 3632 1
+7 0 4719 1
+7 0 6477 1
+7 0 6502 1
+7 0 6709 1
+7 0 7710 1
+7 0 9193 1
+7 0 9309 1
+7 0 9789 1
+7 0 10971 1
+7 0 18059 1
+7 0 19906 1
+7 0 20089 1
+7 0 20102 1
+7 0 21040 1
+7 0 21153 1
+7 0 9147 1
+7 0 9930 1
+7 0 19763 1
+7 0 1559 1
+7 0 21248 1
+7 0 17945 1
+7 0 526 1
# --- m==0: avg_precision and recall are hurt, by marking fake docs as relevant
@@ -309,71 +309,71 @@
8 0 fakedoc3 1
8 0 fakedoc4 1
-8 0 doc16299 1
-8 0 doc1662 1
-8 0 doc4585 1
-8 0 doc12315 1
-8 0 doc16266 1
-8 0 doc13136 1
-8 0 doc19212 1
-8 0 doc7086 1
-8 0 doc7062 1
-8 0 doc6134 1
-8 0 doc13953 1
-8 0 doc16264 1
-8 0 doc2494 1
-8 0 doc10636 1
-8 0 doc10894 1
-8 0 doc6844 1
-8 0 doc674 1
-8 0 doc13520 1
-8 0 doc344 1
-8 0 doc2896 1
-8 0 doc11871 1
-8 0 doc1862 1
-8 0 doc16728 1
-8 0 doc10308 1
-8 0 doc2227 1
-8 0 doc13167 1
-8 0 doc20607 1
-8 0 doc9670 1
-8 0 doc1566 1
-8 0 doc17885 1
+8 0 16299 1
+8 0 1662 1
+8 0 4585 1
+8 0 12315 1
+8 0 16266 1
+8 0 13136 1
+8 0 19212 1
+8 0 7086 1
+8 0 7062 1
+8 0 6134 1
+8 0 13953 1
+8 0 16264 1
+8 0 2494 1
+8 0 10636 1
+8 0 10894 1
+8 0 6844 1
+8 0 674 1
+8 0 13520 1
+8 0 344 1
+8 0 2896 1
+8 0 11871 1
+8 0 1862 1
+8 0 16728 1
+8 0 10308 1
+8 0 2227 1
+8 0 13167 1
+8 0 20607 1
+8 0 9670 1
+8 0 1566 1
+8 0 17885 1
# ---- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
-9 0 doc1990 0
-9 0 doc9342 1
-9 0 doc19427 1
-9 0 doc12432 0
-9 0 doc13480 1
-9 0 doc3322 1
-9 0 doc16044 1
-9 0 doc266 0
-9 0 doc3437 1
-9 0 doc5370 1
-9 0 doc10314 1
-9 0 doc4892 1
-9 0 doc5763 0
-9 0 doc14045 1
-9 0 doc1090 1
-9 0 doc7437 1
-9 0 doc5822 1
-9 0 doc4285 1
-9 0 doc17119 1
-9 0 doc21001 1
-9 0 doc4337 1
-9 0 doc5967 1
-9 0 doc10214 1
-9 0 doc12001 1
-9 0 doc18553 1
-9 0 doc12116 1
-9 0 doc5064 1
-9 0 doc5018 1
-9 0 doc5037 1
-9 0 doc8025 1
+9 0 1990 0
+9 0 9342 1
+9 0 19427 1
+9 0 12432 0
+9 0 13480 1
+9 0 3322 1
+9 0 16044 1
+9 0 266 0
+9 0 3437 1
+9 0 5370 1
+9 0 10314 1
+9 0 4892 1
+9 0 5763 0
+9 0 14045 1
+9 0 1090 1
+9 0 7437 1
+9 0 5822 1
+9 0 4285 1
+9 0 17119 1
+9 0 21001 1
+9 0 4337 1
+9 0 5967 1
+9 0 10214 1
+9 0 12001 1
+9 0 18553 1
+9 0 12116 1
+9 0 5064 1
+9 0 5018 1
+9 0 5037 1
+9 0 8025 1
# ---- m==2: all precision, precision_at_n and recall are hurt.
@@ -383,200 +383,200 @@
10 0 fakedoc3 1
10 0 fakedoc4 1
-10 0 doc17218 0
-10 0 doc10270 0
-10 0 doc5958 0
-10 0 doc19943 0
-10 0 doc6510 1
-10 0 doc16087 1
-10 0 doc14893 1
-10 0 doc8933 1
-10 0 doc4354 1
-10 0 doc16729 1
-10 0 doc16761 1
-10 0 doc6964 1
-10 0 doc16743 1
-10 0 doc7357 1
-10 0 doc2534 1
-10 0 doc18321 1
-10 0 doc18497 1
-10 0 doc11214 1
-10 0 doc11819 1
-10 0 doc10818 1
-10 0 doc15769 1
-10 0 doc5348 1
-10 0 doc14948 1
-10 0 doc7891 1
-10 0 doc9897 1
-10 0 doc15559 1
-10 0 doc14935 1
-10 0 doc14954 1
-10 0 doc6621 1
-10 0 doc6930 1
-
-
-11 0 doc11943 1
-11 0 doc286 1
-11 0 doc1574 1
-11 0 doc17916 1
-11 0 doc17918 1
-11 0 doc19213 1
-11 0 doc9337 1
-11 0 doc8593 1
-11 0 doc8800 1
-11 0 doc18580 1
-11 0 doc209 1
-11 0 doc1893 1
-11 0 doc11189 1
-11 0 doc17702 1
-11 0 doc10180 1
-11 0 doc11869 1
-11 0 doc9705 1
-11 0 doc8715 1
-11 0 doc12753 1
-11 0 doc10195 1
-11 0 doc3552 1
-11 0 doc16030 1
-11 0 doc4623 1
-11 0 doc3188 1
-11 0 doc8735 1
-11 0 doc151 1
-11 0 doc5792 1
-11 0 doc5194 1
-11 0 doc3393 1
-11 0 doc19027 1
-
-
-
-12 0 doc18198 1
-12 0 doc2444 1
-12 0 doc4305 1
-12 0 doc6544 1
-12 0 doc11639 1
-12 0 doc10640 1
-12 0 doc12192 1
-12 0 doc128 1
-12 0 doc10760 1
-12 0 doc10881 1
-12 0 doc2698 1
-12 0 doc3552 1
-12 0 doc20524 1
-12 0 doc1884 1
-12 0 doc9187 1
-12 0 doc3131 1
-12 0 doc2911 1
-12 0 doc2589 1
-12 0 doc3747 1
-12 0 doc3813 1
-12 0 doc5222 1
-12 0 doc6023 1
-12 0 doc6624 1
-12 0 doc7655 1
-12 0 doc9205 1
-12 0 doc12062 1
-12 0 doc15504 1
-12 0 doc13625 1
-12 0 doc18704 1
-12 0 doc2277 1
-
-
-
-13 0 doc4948 1
-13 0 doc21565 1
-13 0 doc17135 1
-13 0 doc1866 1
-13 0 doc13989 1
-13 0 doc5605 1
-13 0 doc13431 1
-13 0 doc2100 1
-13 0 doc16347 1
-13 0 doc16894 1
-13 0 doc6764 1
-13 0 doc8554 1
-13 0 doc8695 1
-13 0 doc8977 1
-13 0 doc19478 1
-13 0 doc14595 1
-13 0 doc2408 1
-13 0 doc2592 1
-13 0 doc10947 1
-13 0 doc15794 1
-13 0 doc5236 1
-13 0 doc14847 1
-13 0 doc3980 1
-13 0 doc1844 1
-13 0 doc42 1
-13 0 doc7783 1
-13 0 doc4557 1
-13 0 doc16423 1
-13 0 doc17170 1
-13 0 doc5822 1
-
-
-
-14 0 doc17172 1
-14 0 doc17210 1
-14 0 doc5044 1
-14 0 doc4627 1
-14 0 doc4683 1
-14 0 doc15126 1
-14 0 doc4538 1
-14 0 doc273 1
-14 0 doc19585 1
-14 0 doc16078 1
-14 0 doc4529 1
-14 0 doc4186 1
-14 0 doc12961 1
-14 0 doc19217 1
-14 0 doc5670 1
-14 0 doc1699 1
-14 0 doc4716 1
-14 0 doc12644 1
-14 0 doc18387 1
-14 0 doc336 1
-14 0 doc16130 1
-14 0 doc18718 1
-14 0 doc12527 1
-14 0 doc11797 1
-14 0 doc11831 1
-14 0 doc7538 1
-14 0 doc17259 1
-14 0 doc18724 1
-14 0 doc19330 1
-14 0 doc19206 1
-
-
-
-15 0 doc12198 1
-15 0 doc20371 1
-15 0 doc2947 1
-15 0 doc10750 1
-15 0 doc7239 1
-15 0 doc14189 1
-15 0 doc19474 1
-15 0 doc14776 1
-15 0 doc21270 1
-15 0 doc6387 1
-15 0 doc12908 1
-15 0 doc9573 1
-15 0 doc17102 1
-15 0 doc21482 1
-15 0 doc6524 1
-15 0 doc18034 1
-15 0 doc1358 1
-15 0 doc13147 1
-15 0 doc17731 1
-15 0 doc12890 1
-15 0 doc20887 1
-15 0 doc19508 1
-15 0 doc18498 1
-15 0 doc20642 1
-15 0 doc19878 1
-15 0 doc6556 1
-15 0 doc10272 1
-15 0 doc5720 1
-15 0 doc17578 1
-15 0 doc17164 1
+10 0 17218 0
+10 0 10270 0
+10 0 5958 0
+10 0 19943 0
+10 0 6510 1
+10 0 16087 1
+10 0 14893 1
+10 0 8933 1
+10 0 4354 1
+10 0 16729 1
+10 0 16761 1
+10 0 6964 1
+10 0 16743 1
+10 0 7357 1
+10 0 2534 1
+10 0 18321 1
+10 0 18497 1
+10 0 11214 1
+10 0 11819 1
+10 0 10818 1
+10 0 15769 1
+10 0 5348 1
+10 0 14948 1
+10 0 7891 1
+10 0 9897 1
+10 0 15559 1
+10 0 14935 1
+10 0 14954 1
+10 0 6621 1
+10 0 6930 1
+
+
+11 0 11943 1
+11 0 286 1
+11 0 1574 1
+11 0 17916 1
+11 0 17918 1
+11 0 19213 1
+11 0 9337 1
+11 0 8593 1
+11 0 8800 1
+11 0 18580 1
+11 0 209 1
+11 0 1893 1
+11 0 11189 1
+11 0 17702 1
+11 0 10180 1
+11 0 11869 1
+11 0 9705 1
+11 0 8715 1
+11 0 12753 1
+11 0 10195 1
+11 0 3552 1
+11 0 16030 1
+11 0 4623 1
+11 0 3188 1
+11 0 8735 1
+11 0 151 1
+11 0 5792 1
+11 0 5194 1
+11 0 3393 1
+11 0 19027 1
+
+
+
+12 0 18198 1
+12 0 2444 1
+12 0 4305 1
+12 0 6544 1
+12 0 11639 1
+12 0 10640 1
+12 0 12192 1
+12 0 128 1
+12 0 10760 1
+12 0 10881 1
+12 0 2698 1
+12 0 3552 1
+12 0 20524 1
+12 0 1884 1
+12 0 9187 1
+12 0 3131 1
+12 0 2911 1
+12 0 2589 1
+12 0 3747 1
+12 0 3813 1
+12 0 5222 1
+12 0 6023 1
+12 0 6624 1
+12 0 7655 1
+12 0 9205 1
+12 0 12062 1
+12 0 15504 1
+12 0 13625 1
+12 0 18704 1
+12 0 2277 1
+
+
+
+13 0 4948 1
+13 0 21565 1
+13 0 17135 1
+13 0 1866 1
+13 0 13989 1
+13 0 5605 1
+13 0 13431 1
+13 0 2100 1
+13 0 16347 1
+13 0 16894 1
+13 0 6764 1
+13 0 8554 1
+13 0 8695 1
+13 0 8977 1
+13 0 19478 1
+13 0 14595 1
+13 0 2408 1
+13 0 2592 1
+13 0 10947 1
+13 0 15794 1
+13 0 5236 1
+13 0 14847 1
+13 0 3980 1
+13 0 1844 1
+13 0 42 1
+13 0 7783 1
+13 0 4557 1
+13 0 16423 1
+13 0 17170 1
+13 0 5822 1
+
+
+
+14 0 17172 1
+14 0 17210 1
+14 0 5044 1
+14 0 4627 1
+14 0 4683 1
+14 0 15126 1
+14 0 4538 1
+14 0 273 1
+14 0 19585 1
+14 0 16078 1
+14 0 4529 1
+14 0 4186 1
+14 0 12961 1
+14 0 19217 1
+14 0 5670 1
+14 0 1699 1
+14 0 4716 1
+14 0 12644 1
+14 0 18387 1
+14 0 336 1
+14 0 16130 1
+14 0 18718 1
+14 0 12527 1
+14 0 11797 1
+14 0 11831 1
+14 0 7538 1
+14 0 17259 1
+14 0 18724 1
+14 0 19330 1
+14 0 19206 1
+
+
+
+15 0 12198 1
+15 0 20371 1
+15 0 2947 1
+15 0 10750 1
+15 0 7239 1
+15 0 14189 1
+15 0 19474 1
+15 0 14776 1
+15 0 21270 1
+15 0 6387 1
+15 0 12908 1
+15 0 9573 1
+15 0 17102 1
+15 0 21482 1
+15 0 6524 1
+15 0 18034 1
+15 0 1358 1
+15 0 13147 1
+15 0 17731 1
+15 0 12890 1
+15 0 20887 1
+15 0 19508 1
+15 0 18498 1
+15 0 20642 1
+15 0 19878 1
+15 0 6556 1
+15 0 10272 1
+15 0 5720 1
+15 0 17578 1
+15 0 17164 1
# --- m==0: avg_precision and recall are hurt, by marking fake docs as relevant
@@ -586,70 +586,70 @@
16 0 fakedoc3 1
16 0 fakedoc4 1
-16 0 doc4043 1
-16 0 doc14985 1
-16 0 doc15370 1
-16 0 doc15426 1
-16 0 doc1702 1
-16 0 doc3062 1
-16 0 doc16134 1
-16 0 doc15037 1
-16 0 doc8224 1
-16 0 doc5044 1
-16 0 doc8545 1
-16 0 doc7228 1
-16 0 doc12686 1
-16 0 doc16609 1
-16 0 doc13161 1
-16 0 doc3446 1
-16 0 doc16493 1
-16 0 doc19297 1
-16 0 doc13619 1
-16 0 doc3281 1
-16 0 doc15499 1
-16 0 doc7373 1
-16 0 doc9064 1
-16 0 doc1710 1
-16 0 doc15411 1
-16 0 doc10890 1
-16 0 doc3166 1
-16 0 doc17894 1
-16 0 doc4560 1
-16 0 doc12766 1
+16 0 4043 1
+16 0 14985 1
+16 0 15370 1
+16 0 15426 1
+16 0 1702 1
+16 0 3062 1
+16 0 16134 1
+16 0 15037 1
+16 0 8224 1
+16 0 5044 1
+16 0 8545 1
+16 0 7228 1
+16 0 12686 1
+16 0 16609 1
+16 0 13161 1
+16 0 3446 1
+16 0 16493 1
+16 0 19297 1
+16 0 13619 1
+16 0 3281 1
+16 0 15499 1
+16 0 7373 1
+16 0 9064 1
+16 0 1710 1
+16 0 15411 1
+16 0 10890 1
+16 0 3166 1
+16 0 17894 1
+16 0 4560 1
+16 0 12766 1
# --- m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
-17 0 doc3117 0
-17 0 doc7477 0
-17 0 doc7569 0
-17 0 doc20667 0
-17 0 doc20260 1
-17 0 doc17355 1
-17 0 doc11021 1
-17 0 doc20934 1
-17 0 doc552 1
-17 0 doc20856 1
-17 0 doc3524 1
-17 0 doc17343 1
-17 0 doc21055 1
-17 0 doc19032 1
-17 0 doc19786 1
-17 0 doc9281 1
-17 0 doc1695 1
-17 0 doc15940 1
-17 0 doc9215 1
-17 0 doc8335 1
-17 0 doc20936 1
-17 0 doc6914 1
-17 0 doc12122 1
-17 0 doc6618 1
-17 0 doc5049 1
-17 0 doc450 1
-17 0 doc19206 1
-17 0 doc18823 1
-17 0 doc5307 1
-17 0 doc17295 1
+17 0 3117 0
+17 0 7477 0
+17 0 7569 0
+17 0 20667 0
+17 0 20260 1
+17 0 17355 1
+17 0 11021 1
+17 0 20934 1
+17 0 552 1
+17 0 20856 1
+17 0 3524 1
+17 0 17343 1
+17 0 21055 1
+17 0 19032 1
+17 0 19786 1
+17 0 9281 1
+17 0 1695 1
+17 0 15940 1
+17 0 9215 1
+17 0 8335 1
+17 0 20936 1
+17 0 6914 1
+17 0 12122 1
+17 0 6618 1
+17 0 5049 1
+17 0 450 1
+17 0 19206 1
+17 0 18823 1
+17 0 5307 1
+17 0 17295 1
# ---- m==2: all precision, precision_at_n and recall are hurt.
@@ -659,65 +659,65 @@
18 0 fakedoc3 1
18 0 fakedoc4 1
-18 0 doc8064 0
-18 0 doc18142 0
-18 0 doc19383 0
-18 0 doc21151 0
-18 0 doc4665 1
-18 0 doc2897 1
-18 0 doc6878 1
-18 0 doc14507 1
-18 0 doc2976 1
-18 0 doc11757 1
-18 0 doc12625 1
-18 0 doc14908 1
-18 0 doc12790 1
-18 0 doc17915 1
-18 0 doc11804 1
-18 0 doc12935 1
-18 0 doc8225 1
-18 0 doc18011 1
-18 0 doc10493 1
-18 0 doc17922 1
-18 0 doc1902 1
-18 0 doc14049 1
-18 0 doc1334 1
-18 0 doc1168 1
-18 0 doc4859 1
-18 0 doc7124 1
-18 0 doc9692 1
-18 0 doc18402 1
-18 0 doc9089 1
-18 0 doc15375 1
-
-
-19 0 doc5267 1
-19 0 doc2310 1
-19 0 doc11435 1
-19 0 doc15666 1
-19 0 doc12733 1
-19 0 doc7925 1
-19 0 doc2444 1
-19 0 doc4900 1
-19 0 doc10803 1
-19 0 doc8869 1
-19 0 doc5051 1
-19 0 doc9163 1
-19 0 doc529 1
-19 0 doc19546 1
-19 0 doc18561 1
-19 0 doc10634 1
-19 0 doc3979 1
-19 0 doc8833 1
-19 0 doc7652 1
-19 0 doc4804 1
-19 0 doc12616 1
-19 0 doc8419 1
-19 0 doc9431 1
-19 0 doc16235 1
-19 0 doc732 1
-19 0 doc2515 1
-19 0 doc7194 1
-19 0 doc16301 1
-19 0 doc4494 1
-19 0 doc4496 1
+18 0 8064 0
+18 0 18142 0
+18 0 19383 0
+18 0 21151 0
+18 0 4665 1
+18 0 2897 1
+18 0 6878 1
+18 0 14507 1
+18 0 2976 1
+18 0 11757 1
+18 0 12625 1
+18 0 14908 1
+18 0 12790 1
+18 0 17915 1
+18 0 11804 1
+18 0 12935 1
+18 0 8225 1
+18 0 18011 1
+18 0 10493 1
+18 0 17922 1
+18 0 1902 1
+18 0 14049 1
+18 0 1334 1
+18 0 1168 1
+18 0 4859 1
+18 0 7124 1
+18 0 9692 1
+18 0 18402 1
+18 0 9089 1
+18 0 15375 1
+
+
+19 0 5267 1
+19 0 2310 1
+19 0 11435 1
+19 0 15666 1
+19 0 12733 1
+19 0 7925 1
+19 0 2444 1
+19 0 4900 1
+19 0 10803 1
+19 0 8869 1
+19 0 5051 1
+19 0 9163 1
+19 0 529 1
+19 0 19546 1
+19 0 18561 1
+19 0 10634 1
+19 0 3979 1
+19 0 8833 1
+19 0 7652 1
+19 0 4804 1
+19 0 12616 1
+19 0 8419 1
+19 0 9431 1
+19 0 16235 1
+19 0 732 1
+19 0 2515 1
+19 0 7194 1
+19 0 16301 1
+19 0 4494 1
+19 0 4496 1