You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/12/20 23:49:52 UTC
svn commit: r1051309 - in /lucene/dev/branches/bulkpostings: ./ lucene/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/
lucene/contrib/benchmark/src...
Author: rmuir
Date: Mon Dec 20 22:49:51 2010
New Revision: 1051309
URL: http://svn.apache.org/viewvc?rev=1051309&view=rev
Log:
merge trunk (1050789:1051305)
Modified:
lucene/dev/branches/bulkpostings/ (props changed)
lucene/dev/branches/bulkpostings/lucene/ (props changed)
lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java
lucene/dev/branches/bulkpostings/solr/ (props changed)
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java Mon Dec 20 22:49:51 2010
@@ -29,6 +29,7 @@ public class DocData {
private String body;
private String title;
private String date;
+ private int id;
private Properties props;
public void clear() {
@@ -37,6 +38,7 @@ public class DocData {
title = null;
date = null;
props = null;
+ id = -1;
}
public String getBody() {
@@ -57,6 +59,10 @@ public class DocData {
return name;
}
+ public int getID() {
+ return id;
+ }
+
public Properties getProps() {
return props;
}
@@ -85,6 +91,10 @@ public class DocData {
this.name = name;
}
+ public void setID(int id) {
+ this.id = id;
+ }
+
public void setProps(Properties props) {
this.props = props;
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Dec 20 22:49:51 2010
@@ -20,14 +20,21 @@ package org.apache.lucene.benchmark.byTa
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
+import java.util.Calendar;
import java.util.Map;
import java.util.Properties;
+import java.util.Locale;
import java.util.Random;
+import java.util.Date;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.text.SimpleDateFormat;
+import java.text.ParsePosition;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
@@ -82,6 +89,7 @@ public class DocMaker {
static class DocState {
private final Map<String,Field> fields;
+ private final Map<String,NumericField> numericFields;
private final boolean reuseFields;
final Document doc;
DocData docData = new DocData();
@@ -92,6 +100,7 @@ public class DocMaker {
if (reuseFields) {
fields = new HashMap<String,Field>();
+ numericFields = new HashMap<String,NumericField>();
// Initialize the map with the default fields.
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
@@ -99,9 +108,13 @@ public class DocMaker {
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
+
+ numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD));
+ numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD));
doc = new Document();
} else {
+ numericFields = null;
fields = null;
doc = null;
}
@@ -124,18 +137,42 @@ public class DocMaker {
}
return f;
}
+
+ NumericField getNumericField(String name) {
+ if (!reuseFields) {
+ return new NumericField(name);
+ }
+
+ NumericField f = numericFields.get(name);
+ if (f == null) {
+ f = new NumericField(name);
+ numericFields.put(name, f);
+ }
+ return f;
+ }
}
- private int numDocsCreated = 0;
private boolean storeBytes = false;
+ private static class DateUtil {
+ public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
+ public Calendar cal = Calendar.getInstance();
+ public ParsePosition pos = new ParsePosition(0);
+ public DateUtil() {
+ parser.setLenient(true);
+ }
+ }
+
// leftovers are thread local, because it is unsafe to share residues between threads
private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
+ private ThreadLocal<DateUtil> dateParsers = new ThreadLocal<DateUtil>();
public static final String BODY_FIELD = "body";
public static final String TITLE_FIELD = "doctitle";
public static final String DATE_FIELD = "docdate";
+ public static final String DATE_MSEC_FIELD = "docdatenum";
+ public static final String TIME_SEC_FIELD = "doctimesecnum";
public static final String ID_FIELD = "docid";
public static final String BYTES_FIELD = "bytes";
public static final String NAME_FIELD = "docname";
@@ -155,6 +192,7 @@ public class DocMaker {
private int lastPrintedNumUniqueTexts = 0;
private long lastPrintedNumUniqueBytes = 0;
+ private final AtomicInteger numDocsCreated = new AtomicInteger();
private int printNum = 0;
@@ -169,7 +207,16 @@ public class DocMaker {
// Set ID_FIELD
Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
- idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
+ int id;
+ if (r != null) {
+ id = r.nextInt(updateDocIDLimit);
+ } else {
+ id = docData.getID();
+ if (id == -1) {
+ id = numDocsCreated.getAndIncrement();
+ }
+ }
+ idField.setValue(Integer.toString(id));
doc.add(idField);
// Set NAME_FIELD
@@ -181,13 +228,39 @@ public class DocMaker {
doc.add(nameField);
// Set DATE_FIELD
- String date = docData.getDate();
+ DateUtil util = dateParsers.get();
+ if (util == null) {
+ util = new DateUtil();
+ dateParsers.set(util);
+ }
+ Date date = null;
+ String dateString = docData.getDate();
+ if (dateString != null) {
+ util.pos.setIndex(0);
+ date = util.parser.parse(dateString, util.pos);
+ //System.out.println(dateString + " parsed to " + date);
+ } else {
+ dateString = "";
+ }
+ Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
+ dateStringField.setValue(dateString);
+ doc.add(dateStringField);
+
if (date == null) {
- date = "";
+ // just set to right now
+ date = new Date();
}
- Field dateField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
- dateField.setValue(date);
+
+ NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD);
+ dateField.setLongValue(date.getTime());
doc.add(dateField);
+
+ util.cal.setTime(date);
+ final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND);
+
+ NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD);
+ timeSecField.setIntValue(sec);
+ doc.add(timeSecField);
// Set TITLE_FIELD
String title = docData.getTitle();
@@ -252,10 +325,6 @@ public class DocMaker {
return ds;
}
- protected synchronized int incrNumDocsCreated() {
- return numDocsCreated++;
- }
-
/**
* Closes the {@link DocMaker}. The base implementation closes the
* {@link ContentSource}, and it can be overridden to do more work (but make
@@ -363,7 +432,7 @@ public class DocMaker {
// re-initiate since properties by round may have changed.
setConfig(config);
source.resetInputs();
- numDocsCreated = 0;
+ numDocsCreated.set(0);
resetLeftovers();
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Mon Dec 20 22:49:51 2010
@@ -48,6 +48,7 @@ public class LineDocSource extends Conte
private File file;
private BufferedReader reader;
+ private int readCount;
private synchronized void openFile() {
try {
@@ -71,9 +72,12 @@ public class LineDocSource extends Conte
@Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
- String line;
+ final String line;
+ final int myID;
+
synchronized(this) {
line = reader.readLine();
+ myID = readCount++;
if (line == null) {
if (!forever) {
throw new NoMoreDataException();
@@ -96,6 +100,7 @@ public class LineDocSource extends Conte
}
// The date String was written in the format of DateTools.dateToString.
docData.clear();
+ docData.setID(myID);
docData.setBody(line.substring(1 + spot2, line.length()));
docData.setTitle(line.substring(0, spot));
docData.setDate(line.substring(1 + spot, spot2));
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java Mon Dec 20 22:49:51 2010
@@ -26,6 +26,7 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
/**
* Simple task to test performance of tokenizers. It just
@@ -67,7 +68,7 @@ public class ReadTokensTask extends Perf
Analyzer analyzer = getRunData().getAnalyzer();
int tokenCount = 0;
for(final Fieldable field : fields) {
- if (!field.isTokenized()) continue;
+ if (!field.isTokenized() || field instanceof NumericField) continue;
final TokenStream stream;
final TokenStream streamValue = field.tokenStreamValue();
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Dec 20 22:49:51 2010
@@ -475,8 +475,9 @@ public class TestPerfTasksLogic extends
FieldsEnum fields = MultiFields.getFields(reader).iterator();
String fieldName = null;
while((fieldName = fields.next()) != null) {
- if (fieldName == DocMaker.ID_FIELD)
+ if (fieldName == DocMaker.ID_FIELD || fieldName == DocMaker.DATE_MSEC_FIELD || fieldName == DocMaker.TIME_SEC_FIELD) {
continue;
+ }
TermsEnum terms = fields.terms();
DocsEnum docs = null;
while(terms.next() != null) {
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java Mon Dec 20 22:49:51 2010
@@ -56,7 +56,7 @@ public class BalancedSegmentMergePolicy
@Override
protected long size(SegmentInfo info) throws IOException {
- long byteSize = info.sizeInBytes();
+ long byteSize = info.sizeInBytes(true);
float delRatio = (info.docCount <= 0 ? 0.0f : ((float)info.getDelCount() / (float)info.docCount));
return (info.docCount <= 0 ? byteSize : (long)((1.0f - delRatio) * byteSize));
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java Mon Dec 20 22:49:51 2010
@@ -26,7 +26,6 @@ import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.FSDirectory;
@@ -108,7 +107,7 @@ public class IndexSplitter {
DecimalFormat formatter = new DecimalFormat("###,###.###");
for (int x = 0; x < infos.size(); x++) {
SegmentInfo info = infos.info(x);
- String sizeStr = formatter.format(info.sizeInBytes());
+ String sizeStr = formatter.format(info.sizeInBytes(true));
System.out.println(info.name + " " + sizeStr);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon Dec 20 22:49:51 2010
@@ -417,8 +417,8 @@ public class CheckIndex {
segInfoStat.hasProx = info.getHasProx();
msg(" numFiles=" + info.files().size());
segInfoStat.numFiles = info.files().size();
- msg(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
- segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
+ segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.);
+ msg(" size (MB)=" + nf.format(segInfoStat.sizeMB));
Map<String,String> diagnostics = info.getDiagnostics();
segInfoStat.diagnostics = diagnostics;
if (diagnostics.size() > 0) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Mon Dec 20 22:49:51 2010
@@ -65,7 +65,6 @@ public class ConcurrentMergeScheduler ex
protected Directory dir;
- private boolean closed;
protected IndexWriter writer;
protected int mergeThreadCount;
@@ -147,18 +146,37 @@ public class ConcurrentMergeScheduler ex
* pause & unpause threads. */
protected synchronized void updateMergeThreads() {
- CollectionUtil.mergeSort(mergeThreads, compareByMergeDocCount);
+ // Only look at threads that are alive & not in the
+ // process of stopping (ie have an active merge):
+ final List<MergeThread> activeMerges = new ArrayList<MergeThread>();
+
+ int threadIdx = 0;
+ while (threadIdx < mergeThreads.size()) {
+ final MergeThread mergeThread = mergeThreads.get(threadIdx);
+ if (!mergeThread.isAlive()) {
+ // Prune any dead threads
+ mergeThreads.remove(threadIdx);
+ continue;
+ }
+ if (mergeThread.getCurrentMerge() != null) {
+ activeMerges.add(mergeThread);
+ }
+ threadIdx++;
+ }
+
+ CollectionUtil.mergeSort(activeMerges, compareByMergeDocCount);
- final int count = mergeThreads.size();
int pri = mergeThreadPriority;
- for(int i=0;i<count;i++) {
- final MergeThread mergeThread = mergeThreads.get(i);
+ final int activeMergeCount = activeMerges.size();
+ for (threadIdx=0;threadIdx<activeMergeCount;threadIdx++) {
+ final MergeThread mergeThread = activeMerges.get(threadIdx);
final MergePolicy.OneMerge merge = mergeThread.getCurrentMerge();
- if (merge == null) {
+ if (merge == null) {
continue;
}
+
final boolean doPause;
- if (i < count-maxThreadCount) {
+ if (threadIdx < activeMergeCount-maxThreadCount) {
doPause = true;
} else {
doPause = false;
@@ -208,23 +226,29 @@ public class ConcurrentMergeScheduler ex
@Override
public void close() {
- closed = true;
+ sync();
}
- public synchronized void sync() {
- while(mergeThreadCount() > 0) {
- if (verbose())
- message("now wait for threads; currently " + mergeThreads.size() + " still running");
- final int count = mergeThreads.size();
- if (verbose()) {
- for(int i=0;i<count;i++)
- message(" " + i + ": " + mergeThreads.get(i));
+ /** Wait for any running merge threads to finish */
+ public void sync() {
+ while(true) {
+ MergeThread toSync = null;
+ synchronized(this) {
+ for(MergeThread t : mergeThreads) {
+ if (t.isAlive()) {
+ toSync = t;
+ break;
+ }
+ }
}
-
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
+ if (toSync != null) {
+ try {
+ toSync.join();
+ } catch (InterruptedException ie) {
+ throw new ThreadInterruptedException(ie);
+ }
+ } else {
+ break;
}
}
}
@@ -232,9 +256,12 @@ public class ConcurrentMergeScheduler ex
private synchronized int mergeThreadCount() {
int count = 0;
final int numThreads = mergeThreads.size();
- for(int i=0;i<numThreads;i++)
- if (mergeThreads.get(i).isAlive())
+ for(int i=0;i<numThreads;i++) {
+ final MergeThread t = mergeThreads.get(i);
+ if (t.isAlive() && t.getCurrentMerge() != null) {
count++;
+ }
+ }
return count;
}
@@ -311,11 +338,17 @@ public class ConcurrentMergeScheduler ex
// merge:
merger = getMergeThread(writer, merge);
mergeThreads.add(merger);
- updateMergeThreads();
- if (verbose())
+ if (verbose()) {
message(" launch new thread [" + merger.getName() + "]");
+ }
merger.start();
+
+ // Must call this after starting the thread else
+ // the new thread is removed from mergeThreads
+ // (since it's not alive yet):
+ updateMergeThreads();
+
success = true;
}
} finally {
@@ -408,8 +441,6 @@ public class ConcurrentMergeScheduler ex
if (verbose())
message(" merge thread: do another merge " + merge.segString(dir));
} else {
- done = true;
- updateMergeThreads();
break;
}
}
@@ -428,11 +459,10 @@ public class ConcurrentMergeScheduler ex
}
}
} finally {
+ done = true;
synchronized(ConcurrentMergeScheduler.this) {
- ConcurrentMergeScheduler.this.notifyAll();
- boolean removed = mergeThreads.remove(this);
- assert removed;
updateMergeThreads();
+ ConcurrentMergeScheduler.this.notifyAll();
}
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Dec 20 22:49:51 2010
@@ -592,7 +592,7 @@ final class DocumentsWriter {
threads.add(threadState.consumer);
}
- long startNumBytesUsed = bytesUsed();
+ double startMBUsed = bytesUsed()/1024./1024.;
consumer.flush(threads, flushState);
newSegment.setHasVectors(flushState.hasVectors);
@@ -622,11 +622,13 @@ final class DocumentsWriter {
if (infoStream != null) {
message("flush: segment=" + newSegment);
- final long newSegmentSize = newSegment.sizeInBytes();
- message(" ramUsed=" + nf.format(startNumBytesUsed / 1024. / 1024.) + " MB" +
- " newFlushedSize=" + nf.format(newSegmentSize / 1024 / 1024) + " MB" +
- " docs/MB=" + nf.format(numDocs / (newSegmentSize / 1024. / 1024.)) +
- " new/old=" + nf.format(100.0 * newSegmentSize / startNumBytesUsed) + "%");
+ final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
+ final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.;
+ message(" ramUsed=" + nf.format(startMBUsed) + " MB" +
+ " newFlushedSize=" + nf.format(newSegmentSize) + " MB" +
+ " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" +
+ " docs/MB=" + nf.format(numDocs / newSegmentSize) +
+ " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%");
}
success = true;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java Mon Dec 20 22:49:51 2010
@@ -830,7 +830,7 @@ public class IndexWriter implements Clos
private FieldInfos getCurrentFieldInfos() throws IOException {
final FieldInfos fieldInfos;
if (segmentInfos.size() > 0) {
- if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_4_0) {
+ if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
// Pre-4.0 index. In this case we sweep all
// segments, merging their FieldInfos:
fieldInfos = new FieldInfos();
@@ -2923,17 +2923,10 @@ public class IndexWriter implements Clos
if (merge.isAborted())
return;
- boolean hasVectors = false;
- for (SegmentInfo sourceSegment : merge.segments) {
- if (sourceSegment.getHasVectors()) {
- hasVectors = true;
- }
- }
-
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, hasVectors);
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
@@ -3070,13 +3063,15 @@ public class IndexWriter implements Clos
SegmentInfos sourceSegments = merge.segments;
final int numSegments = sourceSegments.size();
- if (infoStream != null)
- message("merging " + merge.segString(directory));
-
SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
codecs, payloadProcessorProvider,
((FieldInfos) docWriter.getFieldInfos().clone()));
+ if (infoStream != null) {
+ message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
+ }
+
+ merge.info.setHasVectors(merger.fieldInfos().hasVectors());
merge.readers = new SegmentReader[numSegments];
merge.readersClone = new SegmentReader[numSegments];
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Dec 20 22:49:51 2010
@@ -184,7 +184,7 @@ public abstract class LogMergePolicy ext
}
protected long sizeBytes(SegmentInfo info) throws IOException {
- long byteSize = info.sizeInBytes();
+ long byteSize = info.sizeInBytes(true);
if (calibrateSizeByDeletes) {
int delCount = writer.get().numDeletedDocs(info);
double delRatio = (info.docCount <= 0 ? 0.0f : ((float)delCount / (float)info.docCount));
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Dec 20 22:49:51 2010
@@ -165,7 +165,7 @@ public abstract class MergePolicy implem
public long totalBytesSize() throws IOException {
long total = 0;
for (SegmentInfo info : segments) {
- total += info.sizeInBytes();
+ total += info.sizeInBytes(true);
}
return total;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Mon Dec 20 22:49:51 2010
@@ -220,13 +220,16 @@ public final class SegmentInfo {
/** Returns total size in bytes of all of files used by
* this segment. */
- public long sizeInBytes() throws IOException {
+ public long sizeInBytes(boolean includeDocStores) throws IOException {
if (sizeInBytes == -1) {
List<String> files = files();
final int size = files.size();
sizeInBytes = 0;
for(int i=0;i<size;i++) {
final String fileName = files.get(i);
+ if (!includeDocStores && IndexFileNames.isDocStoreFile(fileName)) {
+ continue;
+ }
// We don't count bytes used by a shared doc store
// against this segment:
if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName))
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java Mon Dec 20 22:49:51 2010
@@ -35,13 +35,13 @@ public class DefaultSegmentInfosWriter e
* diagnostics storage, and switches userData to Map */
public static final int FORMAT_DIAGNOSTICS = -9;
+ /** Each segment records whether it has term vectors */
+ public static final int FORMAT_HAS_VECTORS = -10;
+
/** Each segment records whether its postings are written
* in the new flex format */
public static final int FORMAT_4_0 = -11;
- /** Each segment records whether it has term vectors */
- public static final int FORMAT_HAS_VECTORS = -10;
-
/** This must always point to the most recent file format.
* whenever you add a new format, make it 1 smaller (negative version logic)! */
public static final int FORMAT_CURRENT = FORMAT_4_0;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Mon Dec 20 22:49:51 2010
@@ -57,7 +57,7 @@ public abstract class TermsIndexReaderBa
public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
- /** Call this sequentially for each term encoutered,
+ /** Call this sequentially for each term encountered,
* after calling {@link #getIndexOffset}. */
public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java Mon Dec 20 22:49:51 2010
@@ -172,7 +172,7 @@ public final class NumericUtils {
public static int getPrefixCodedLongShift(final BytesRef val) {
final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
if (shift > 63 || shift < 0)
- throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)");
+ throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
return shift;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java Mon Dec 20 22:49:51 2010
@@ -184,6 +184,7 @@ public class TestConcurrentMergeSchedule
writer = new IndexWriter(directory, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(2));
+ writer.setInfoStream(VERBOSE ? System.out : null);
}
writer.close();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Mon Dec 20 22:49:51 2010
@@ -851,10 +851,14 @@ public class TestIndexWriterExceptions e
w.close();
for(int i=0;i<200;i++) {
+ if (VERBOSE) {
+ System.out.println("TEST: iter " + i);
+ }
MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir));
conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new ConcurrentMergeScheduler());
((ConcurrentMergeScheduler) conf.getMergeScheduler()).setSuppressExceptions();
w = new IndexWriter(dir, conf);
+ w.setInfoStream(VERBOSE ? System.out : null);
dir.setRandomIOExceptionRate(0.5);
try {
w.optimize();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java Mon Dec 20 22:49:51 2010
@@ -59,7 +59,7 @@ public class TestSizeBoundedOptimize ext
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
- double min = sis.info(0).sizeInBytes();
+ double min = sis.info(0).sizeInBytes(true);
conf = newWriterConfig();
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java Mon Dec 20 22:49:51 2010
@@ -93,6 +93,9 @@ public class FunctionTestSetup extends L
@Before
public void setUp() throws Exception {
super.setUp();
+ if (VERBOSE) {
+ System.out.println("TEST: setUp");
+ }
// prepare a small index with just a few documents.
dir = newDirectory();
anlzr = new MockAnalyzer();
@@ -101,6 +104,7 @@ public class FunctionTestSetup extends L
iwc.setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 7));
}
RandomIndexWriter iw = new RandomIndexWriter(random, dir, iwc);
+ iw.w.setInfoStream(VERBOSE ? System.out : null);
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
int remaining = N_DOCS;
boolean done[] = new boolean[N_DOCS];
@@ -115,9 +119,15 @@ public class FunctionTestSetup extends L
remaining --;
}
if (!doMultiSegment) {
+ if (VERBOSE) {
+ System.out.println("TEST: setUp optimize");
+ }
iw.optimize();
}
iw.close();
+ if (VERBOSE) {
+ System.out.println("TEST: setUp done close");
+ }
}
private void addDoc(RandomIndexWriter iw, int i) throws Exception {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java Mon Dec 20 22:49:51 2010
@@ -18,18 +18,20 @@ package org.apache.lucene.store;
*/
import java.io.Closeable;
-import java.io.IOException;
import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
-import java.util.Random;
import java.util.Map;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.Random;
import java.util.Set;
-import java.util.ArrayList;
+
+import org.apache.lucene.util.LuceneTestCase;
/**
* This is a Directory Wrapper that adds methods
@@ -220,6 +222,10 @@ public class MockDirectoryWrapper extend
if (randomIOExceptionRate > 0.0) {
int number = Math.abs(randomState.nextInt() % 1000);
if (number < randomIOExceptionRate*1000) {
+ if (LuceneTestCase.VERBOSE) {
+ System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception");
+ new Throwable().printStackTrace(System.out);
+ }
throw new IOException("a random IOException");
}
}