You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/09 10:36:03 UTC
svn commit: r1068809 [5/36] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/copyright/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/queryparser/ dev-tools/...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DateTools.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DateTools.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DateTools.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DateTools.java Wed Feb 9 09:35:27 2011
@@ -47,28 +47,37 @@ import org.apache.lucene.util.NumericUti
*/
public class DateTools {
- private final static TimeZone GMT = TimeZone.getTimeZone("GMT");
+ private static final class DateFormats {
+ final static TimeZone GMT = TimeZone.getTimeZone("GMT");
- private static final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
- private static final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
- private static final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
- private static final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
- private static final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
- private static final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
- private static final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
- static {
- // times need to be normalized so the value doesn't depend on the
- // location the index is created/used:
- YEAR_FORMAT.setTimeZone(GMT);
- MONTH_FORMAT.setTimeZone(GMT);
- DAY_FORMAT.setTimeZone(GMT);
- HOUR_FORMAT.setTimeZone(GMT);
- MINUTE_FORMAT.setTimeZone(GMT);
- SECOND_FORMAT.setTimeZone(GMT);
- MILLISECOND_FORMAT.setTimeZone(GMT);
+ final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
+ final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
+ final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
+ final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
+ final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
+ final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
+ final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
+ {
+ // times need to be normalized so the value doesn't depend on the
+ // location the index is created/used:
+ YEAR_FORMAT.setTimeZone(GMT);
+ MONTH_FORMAT.setTimeZone(GMT);
+ DAY_FORMAT.setTimeZone(GMT);
+ HOUR_FORMAT.setTimeZone(GMT);
+ MINUTE_FORMAT.setTimeZone(GMT);
+ SECOND_FORMAT.setTimeZone(GMT);
+ MILLISECOND_FORMAT.setTimeZone(GMT);
+ }
+
+ final Calendar calInstance = Calendar.getInstance(GMT, Locale.US);
}
-
- private static final Calendar calInstance = Calendar.getInstance(GMT);
+
+ private static final ThreadLocal<DateFormats> FORMATS = new ThreadLocal<DateFormats>() {
+ @Override
+ protected DateFormats initialValue() {
+ return new DateFormats();
+ }
+ };
// cannot create, the class has static methods only
private DateTools() {}
@@ -82,7 +91,7 @@ public class DateTools {
* @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
* depending on <code>resolution</code>; using GMT as timezone
*/
- public static synchronized String dateToString(Date date, Resolution resolution) {
+ public static String dateToString(Date date, Resolution resolution) {
return timeToString(date.getTime(), resolution);
}
@@ -95,24 +104,20 @@ public class DateTools {
* @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
* depending on <code>resolution</code>; using GMT as timezone
*/
- public static synchronized String timeToString(long time, Resolution resolution) {
- calInstance.setTimeInMillis(round(time, resolution));
- Date date = calInstance.getTime();
-
- if (resolution == Resolution.YEAR) {
- return YEAR_FORMAT.format(date);
- } else if (resolution == Resolution.MONTH) {
- return MONTH_FORMAT.format(date);
- } else if (resolution == Resolution.DAY) {
- return DAY_FORMAT.format(date);
- } else if (resolution == Resolution.HOUR) {
- return HOUR_FORMAT.format(date);
- } else if (resolution == Resolution.MINUTE) {
- return MINUTE_FORMAT.format(date);
- } else if (resolution == Resolution.SECOND) {
- return SECOND_FORMAT.format(date);
- } else if (resolution == Resolution.MILLISECOND) {
- return MILLISECOND_FORMAT.format(date);
+ public static String timeToString(long time, Resolution resolution) {
+ final DateFormats formats = FORMATS.get();
+
+ formats.calInstance.setTimeInMillis(round(time, resolution));
+ final Date date = formats.calInstance.getTime();
+
+ switch (resolution) {
+ case YEAR: return formats.YEAR_FORMAT.format(date);
+ case MONTH:return formats.MONTH_FORMAT.format(date);
+ case DAY: return formats.DAY_FORMAT.format(date);
+ case HOUR: return formats.HOUR_FORMAT.format(date);
+ case MINUTE: return formats.MINUTE_FORMAT.format(date);
+ case SECOND: return formats.SECOND_FORMAT.format(date);
+ case MILLISECOND: return formats.MILLISECOND_FORMAT.format(date);
}
throw new IllegalArgumentException("unknown resolution " + resolution);
@@ -128,7 +133,7 @@ public class DateTools {
* @throws ParseException if <code>dateString</code> is not in the
* expected format
*/
- public static synchronized long stringToTime(String dateString) throws ParseException {
+ public static long stringToTime(String dateString) throws ParseException {
return stringToDate(dateString).getTime();
}
@@ -142,21 +147,23 @@ public class DateTools {
* @throws ParseException if <code>dateString</code> is not in the
* expected format
*/
- public static synchronized Date stringToDate(String dateString) throws ParseException {
+ public static Date stringToDate(String dateString) throws ParseException {
+ final DateFormats formats = FORMATS.get();
+
if (dateString.length() == 4) {
- return YEAR_FORMAT.parse(dateString);
+ return formats.YEAR_FORMAT.parse(dateString);
} else if (dateString.length() == 6) {
- return MONTH_FORMAT.parse(dateString);
+ return formats.MONTH_FORMAT.parse(dateString);
} else if (dateString.length() == 8) {
- return DAY_FORMAT.parse(dateString);
+ return formats.DAY_FORMAT.parse(dateString);
} else if (dateString.length() == 10) {
- return HOUR_FORMAT.parse(dateString);
+ return formats.HOUR_FORMAT.parse(dateString);
} else if (dateString.length() == 12) {
- return MINUTE_FORMAT.parse(dateString);
+ return formats.MINUTE_FORMAT.parse(dateString);
} else if (dateString.length() == 14) {
- return SECOND_FORMAT.parse(dateString);
+ return formats.SECOND_FORMAT.parse(dateString);
} else if (dateString.length() == 17) {
- return MILLISECOND_FORMAT.parse(dateString);
+ return formats.MILLISECOND_FORMAT.parse(dateString);
}
throw new ParseException("Input is not valid date string: " + dateString, 0);
}
@@ -170,7 +177,7 @@ public class DateTools {
* @return the date with all values more precise than <code>resolution</code>
* set to 0 or 1
*/
- public static synchronized Date round(Date date, Resolution resolution) {
+ public static Date round(Date date, Resolution resolution) {
return new Date(round(date.getTime(), resolution));
}
@@ -184,67 +191,63 @@ public class DateTools {
* @return the date with all values more precise than <code>resolution</code>
* set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
*/
- public static synchronized long round(long time, Resolution resolution) {
+ public static long round(long time, Resolution resolution) {
+ final Calendar calInstance = FORMATS.get().calInstance;
calInstance.setTimeInMillis(time);
- if (resolution == Resolution.YEAR) {
- calInstance.set(Calendar.MONTH, 0);
- calInstance.set(Calendar.DAY_OF_MONTH, 1);
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MONTH) {
- calInstance.set(Calendar.DAY_OF_MONTH, 1);
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.DAY) {
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.HOUR) {
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MINUTE) {
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.SECOND) {
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MILLISECOND) {
- // don't cut off anything
- } else {
- throw new IllegalArgumentException("unknown resolution " + resolution);
+ switch (resolution) {
+ case YEAR:
+ calInstance.set(Calendar.MONTH, 0);
+ calInstance.set(Calendar.DAY_OF_MONTH, 1);
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MONTH:
+ calInstance.set(Calendar.DAY_OF_MONTH, 1);
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case DAY:
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case HOUR:
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MINUTE:
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case SECOND:
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MILLISECOND:
+ // don't cut off anything
+ break;
+ default:
+ throw new IllegalArgumentException("unknown resolution " + resolution);
}
return calInstance.getTimeInMillis();
}
/** Specifies the time granularity. */
- public static class Resolution {
+ public static enum Resolution {
- public static final Resolution YEAR = new Resolution("year");
- public static final Resolution MONTH = new Resolution("month");
- public static final Resolution DAY = new Resolution("day");
- public static final Resolution HOUR = new Resolution("hour");
- public static final Resolution MINUTE = new Resolution("minute");
- public static final Resolution SECOND = new Resolution("second");
- public static final Resolution MILLISECOND = new Resolution("millisecond");
+ YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND;
- private String resolution;
-
- private Resolution() {
- }
-
- private Resolution(String resolution) {
- this.resolution = resolution;
- }
-
+ /** this method returns the name of the resolution
+ * in lowercase (for backwards compatibility) */
@Override
public String toString() {
- return resolution;
+ return super.toString().toLowerCase(Locale.ENGLISH);
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java Wed Feb 9 09:35:27 2011
@@ -18,6 +18,7 @@ package org.apache.lucene.document;
*/
import java.util.*; // for javadoc
+import org.apache.lucene.search.IndexSearcher; // for javadoc
import org.apache.lucene.search.ScoreDoc; // for javadoc
import org.apache.lucene.index.IndexReader; // for javadoc
@@ -165,7 +166,7 @@ public final class Document implements j
/** Returns a List of all the fields in a document.
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <i>not</i> available in documents retrieved from the
- * index, e.g. {@link Searcher#doc(int)} or {@link
+ * index, e.g. {@link IndexSearcher#doc(int)} or {@link
* IndexReader#document(int)}.
*/
public final List<Fieldable> getFields() {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Fieldable.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Fieldable.java Wed Feb 9 09:35:27 2011
@@ -67,7 +67,7 @@ public interface Fieldable extends Seria
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when
+ * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java Wed Feb 9 09:35:27 2011
@@ -134,8 +134,6 @@ import org.apache.lucene.search.FieldCac
* values are returned as {@link String}s (according to
* <code>toString(value)</code> of the used data type).
*
- * @lucene.experimental
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java Wed Feb 9 09:35:27 2011
@@ -548,10 +548,10 @@ public class CheckIndex {
if (infoStream != null) {
infoStream.print(" test: field norms.........");
}
- final byte[] b = new byte[reader.maxDoc()];
+ byte[] b;
for (final String fieldName : fieldNames) {
if (reader.hasNorms(fieldName)) {
- reader.norms(fieldName, b, 0);
+ b = reader.norms(fieldName);
++status.totFields;
}
}
@@ -610,6 +610,8 @@ public class CheckIndex {
Comparator<BytesRef> termComp = terms.getComparator();
+ long sumTotalTermFreq = 0;
+
while(true) {
final BytesRef term = terms.next();
@@ -660,6 +662,8 @@ public class CheckIndex {
}
int lastDoc = -1;
+ int docCount = 0;
+ long totalTermFreq = 0;
while(true) {
final int doc = docs2.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -667,6 +671,8 @@ public class CheckIndex {
}
final int freq = docs2.freq();
status.totPos += freq;
+ totalTermFreq += freq;
+ docCount++;
if (doc <= lastDoc) {
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
@@ -697,22 +703,39 @@ public class CheckIndex {
}
}
}
+
+ final long totalTermFreq2 = terms.totalTermFreq();
+ final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1;
- // Now count how many deleted docs occurred in
- // this term:
-
+ // Re-count if there are deleted docs:
if (reader.hasDeletions()) {
final DocsEnum docsNoDel = terms.docs(null, docs);
- int count = 0;
+ docCount = 0;
+ totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
- count++;
+ docCount++;
+ totalTermFreq += docsNoDel.freq();
}
- if (count != docFreq) {
- throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + count);
+ }
+
+ if (docCount != docFreq) {
+ throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
+ }
+ if (hasTotalTermFreq) {
+ sumTotalTermFreq += totalTermFreq;
+ if (totalTermFreq != totalTermFreq2) {
+ throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
}
}
}
+ if (sumTotalTermFreq != 0) {
+ final long v = fields.terms(field).getSumTotalTermFreq();
+ if (v != -1 && sumTotalTermFreq != v) {
+ throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
+ }
+ }
+
// Test seek to last term:
if (lastTerm != null) {
if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) {
@@ -779,7 +802,7 @@ public class CheckIndex {
msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
} catch (Throwable e) {
- msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ msg("ERROR: " + e);
status.error = e;
if (infoStream != null) {
e.printStackTrace(infoStream);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Wed Feb 9 09:35:27 2011
@@ -17,16 +17,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.LinkedList;
+
+import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
-import java.util.LinkedList;
-import java.util.HashSet;
-
-import java.io.IOException;
-
/**
* Combines multiple files into a single compound file.
* The file format:<br>
@@ -80,7 +80,7 @@ final class CompoundFileWriter {
private HashSet<String> ids;
private LinkedList<FileEntry> entries;
private boolean merged = false;
- private SegmentMerger.CheckAbort checkAbort;
+ private MergeState.CheckAbort checkAbort;
/** Create the compound stream in the specified file. The file name is the
* entire name (no extensions are added).
@@ -90,7 +90,7 @@ final class CompoundFileWriter {
this(dir, name, null);
}
- CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
+ CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) {
if (dir == null)
throw new NullPointerException("directory cannot be null");
if (name == null)
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Wed Feb 9 09:35:27 2011
@@ -142,8 +142,12 @@ public class ConcurrentMergeScheduler ex
}
};
- /** Called whenever the running merges have changed, to
- * pause & unpause threads. */
+ /**
+ * Called whenever the running merges have changed, to pause & unpause
+ * threads. This method sorts the merge threads by their merge size in
+ * descending order and then pauses/unpauses threads from first to last --
+ * that way, smaller merges are guaranteed to run before larger ones.
+ */
protected synchronized void updateMergeThreads() {
// Only look at threads that are alive & not in the
@@ -164,6 +168,7 @@ public class ConcurrentMergeScheduler ex
threadIdx++;
}
+ // Sort the merge threads in descending order.
CollectionUtil.mergeSort(activeMerges, compareByMergeDocCount);
int pri = mergeThreadPriority;
@@ -175,12 +180,8 @@ public class ConcurrentMergeScheduler ex
continue;
}
- final boolean doPause;
- if (threadIdx < activeMergeCount-maxThreadCount) {
- doPause = true;
- } else {
- doPause = false;
- }
+ // pause the thread if maxThreadCount is smaller than the number of merge threads.
+ final boolean doPause = threadIdx < activeMergeCount - maxThreadCount;
if (verbose()) {
if (doPause != merge.getPause()) {
@@ -205,13 +206,26 @@ public class ConcurrentMergeScheduler ex
}
}
- private boolean verbose() {
+ /**
+ * Returns true if verbosing is enabled. This method is usually used in
+ * conjunction with {@link #message(String)}, like that:
+ *
+ * <pre>
+ * if (verbose()) {
+ * message("your message");
+ * }
+ * </pre>
+ */
+ protected boolean verbose() {
return writer != null && writer.verbose();
}
- private void message(String message) {
- if (verbose())
- writer.message("CMS: " + message);
+ /**
+ * Outputs the given message - this method assumes {@link #verbose()} was
+ * called and returned true.
+ */
+ protected void message(String message) {
+ writer.message("CMS: " + message);
}
private synchronized void initMergeThreadPriority() {
@@ -231,10 +245,10 @@ public class ConcurrentMergeScheduler ex
/** Wait for any running merge threads to finish */
public void sync() {
- while(true) {
+ while (true) {
MergeThread toSync = null;
- synchronized(this) {
- for(MergeThread t : mergeThreads) {
+ synchronized (this) {
+ for (MergeThread t : mergeThreads) {
if (t.isAlive()) {
toSync = t;
break;
@@ -253,12 +267,14 @@ public class ConcurrentMergeScheduler ex
}
}
- private synchronized int mergeThreadCount() {
+ /**
+ * Returns the number of merge threads that are alive. Note that this number
+ * is ≤ {@link #mergeThreads} size.
+ */
+ protected synchronized int mergeThreadCount() {
int count = 0;
- final int numThreads = mergeThreads.size();
- for(int i=0;i<numThreads;i++) {
- final MergeThread t = mergeThreads.get(i);
- if (t.isAlive() && t.getCurrentMerge() != null) {
+ for (MergeThread mt : mergeThreads) {
+ if (mt.isAlive() && mt.getCurrentMerge() != null) {
count++;
}
}
@@ -266,8 +282,7 @@ public class ConcurrentMergeScheduler ex
}
@Override
- public void merge(IndexWriter writer)
- throws CorruptIndexException, IOException {
+ public void merge(IndexWriter writer) throws IOException {
assert !Thread.holdsLock(writer);
@@ -291,12 +306,33 @@ public class ConcurrentMergeScheduler ex
// Iterate, pulling from the IndexWriter's queue of
// pending merges, until it's empty:
- while(true) {
+ while (true) {
+
+ synchronized(this) {
+ long startStallTime = 0;
+ while (mergeThreadCount() >= 1+maxMergeCount) {
+ startStallTime = System.currentTimeMillis();
+ if (verbose()) {
+ message(" too many merges; stalling...");
+ }
+ try {
+ wait();
+ } catch (InterruptedException ie) {
+ throw new ThreadInterruptedException(ie);
+ }
+ }
+
+ if (verbose()) {
+ if (startStallTime != 0) {
+ message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec");
+ }
+ }
+ }
+
// TODO: we could be careful about which merges to do in
// the BG (eg maybe the "biggest" ones) vs FG, which
// merges to do first (the easiest ones?), etc.
-
MergePolicy.OneMerge merge = writer.getNextMerge();
if (merge == null) {
if (verbose())
@@ -311,32 +347,11 @@ public class ConcurrentMergeScheduler ex
boolean success = false;
try {
synchronized(this) {
- final MergeThread merger;
- long startStallTime = 0;
- while (mergeThreadCount() >= maxMergeCount) {
- startStallTime = System.currentTimeMillis();
- if (verbose()) {
- message(" too many merges; stalling...");
- }
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
-
- if (verbose()) {
- if (startStallTime != 0) {
- message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec");
- }
- message(" consider merge " + merge.segString(dir));
- }
-
- assert mergeThreadCount() < maxMergeCount;
+ message(" consider merge " + merge.segString(dir));
// OK to spawn a new merge thread to handle this
// merge:
- merger = getMergeThread(writer, merge);
+ final MergeThread merger = getMergeThread(writer, merge);
mergeThreads.add(merger);
if (verbose()) {
message(" launch new thread [" + merger.getName() + "]");
@@ -360,8 +375,7 @@ public class ConcurrentMergeScheduler ex
}
/** Does the actual merge, by calling {@link IndexWriter#merge} */
- protected void doMerge(MergePolicy.OneMerge merge)
- throws IOException {
+ protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
writer.merge(merge);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Wed Feb 9 09:35:27 2011
@@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
@@ -35,10 +36,8 @@ import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.BytesRef;
-
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
+import org.apache.lucene.util.MapBackedSet;
/**
* An IndexReader which reads indexes with multiple segments.
@@ -60,8 +59,8 @@ class DirectoryReader extends IndexReade
private boolean rollbackHasChanges;
private SegmentReader[] subReaders;
+ private ReaderContext topLevelReaderContext;
private int[] starts; // 1st docno for each segment
- private final Map<SegmentReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<SegmentReader,ReaderUtil.Slice>();
private int maxDoc = 0;
private int numDocs = -1;
private boolean hasDeletions = false;
@@ -71,6 +70,8 @@ class DirectoryReader extends IndexReade
// opened on a past IndexCommit:
private long maxIndexVersion;
+ private final boolean applyAllDeletes;
+
// static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
// final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
// return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null);
@@ -107,6 +108,8 @@ class DirectoryReader extends IndexReade
} else {
this.codecs = codecs;
}
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
+ applyAllDeletes = false;
// To reduce the chance of hitting FileNotFound
// (and having to retry), we open segments in
@@ -118,6 +121,7 @@ class DirectoryReader extends IndexReade
boolean success = false;
try {
readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
+ readers[i].readerFinishedListeners = readerFinishedListeners;
success = true;
} finally {
if (!success) {
@@ -137,9 +141,11 @@ class DirectoryReader extends IndexReade
}
// Used by near real-time search
- DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
+ this.applyAllDeletes = applyAllDeletes; // saved for reopen
+
segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (codecs == null) {
@@ -147,6 +153,7 @@ class DirectoryReader extends IndexReade
} else {
this.codecs = codecs;
}
+ readerFinishedListeners = writer.getReaderFinishedListeners();
// IndexWriter synchronizes externally before calling
// us, which ensures infos will not change; so there's
@@ -161,6 +168,7 @@ class DirectoryReader extends IndexReade
final SegmentInfo info = infos.info(i);
assert info.dir == dir;
readers[i] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
+ readers[i].readerFinishedListeners = readerFinishedListeners;
success = true;
} finally {
if (!success) {
@@ -183,11 +191,15 @@ class DirectoryReader extends IndexReade
/** This constructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
- boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs,
+ Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
+ this.readerFinishedListeners = readerFinishedListeners;
+ applyAllDeletes = false;
+
if (codecs == null) {
this.codecs = CodecProvider.getDefault();
} else {
@@ -233,8 +245,10 @@ class DirectoryReader extends IndexReade
// this is a new reader; in case we hit an exception we can close it safely
newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
+ newReader.readerFinishedListeners = readerFinishedListeners;
} else {
newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
+ assert newReader.readerFinishedListeners == readerFinishedListeners;
}
if (newReader == newReaders[i]) {
// this reader will be shared between the old and the new one,
@@ -300,25 +314,22 @@ class DirectoryReader extends IndexReade
private void initialize(SegmentReader[] subReaders) throws IOException {
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array
-
+ final AtomicReaderContext[] subReaderCtx = new AtomicReaderContext[subReaders.length];
+ topLevelReaderContext = new CompositeReaderContext(this, subReaderCtx, subReaderCtx);
final List<Fields> subFields = new ArrayList<Fields>();
- final List<ReaderUtil.Slice> fieldSlices = new ArrayList<ReaderUtil.Slice>();
-
+
for (int i = 0; i < subReaders.length; i++) {
starts[i] = maxDoc;
+ subReaderCtx[i] = new AtomicReaderContext(topLevelReaderContext, subReaders[i], i, maxDoc, i, maxDoc);
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
if (subReaders[i].hasDeletions()) {
hasDeletions = true;
}
-
- final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i);
- subReaderToSlice.put(subReaders[i], slice);
-
+
final Fields f = subReaders[i].fields();
if (f != null) {
subFields.add(f);
- fieldSlices.add(slice);
}
}
starts[subReaders.length] = maxDoc;
@@ -361,6 +372,7 @@ class DirectoryReader extends IndexReade
writeLock = null;
hasChanges = false;
}
+ assert newReader.readerFinishedListeners != null;
return newReader;
}
@@ -395,7 +407,9 @@ class DirectoryReader extends IndexReade
// TODO: right now we *always* make a new reader; in
// the future we could have write make some effort to
// detect that no changes have occurred
- return writer.getReader();
+ IndexReader reader = writer.getReader(applyAllDeletes);
+ reader.readerFinishedListeners = readerFinishedListeners;
+ return reader;
}
private IndexReader doReopen(final boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException {
@@ -462,7 +476,7 @@ class DirectoryReader extends IndexReade
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
- reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs);
+ reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs, readerFinishedListeners);
return reader;
}
@@ -606,12 +620,6 @@ class DirectoryReader extends IndexReade
}
@Override
- public synchronized void norms(String field, byte[] result, int offset)
- throws IOException {
- throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
- }
-
- @Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
int i = readerIndex(n); // find segment num
@@ -715,11 +723,18 @@ class DirectoryReader extends IndexReade
// case we have to roll back:
startCommit();
+ final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
+ rollbackSegmentInfos.addAll(segmentInfos);
+
boolean success = false;
try {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
+ // Remove segments that contain only 100% deleted
+ // docs:
+ segmentInfos.pruneDeletedSegments();
+
// Sync all files we just wrote
directory.sync(segmentInfos.files(directory, false));
segmentInfos.commit(directory);
@@ -739,6 +754,10 @@ class DirectoryReader extends IndexReade
// partially written .del files, etc, are
// removed):
deleter.refresh();
+
+ // Restore all SegmentInfos (in case we pruned some)
+ segmentInfos.clear();
+ segmentInfos.addAll(rollbackSegmentInfos);
}
}
@@ -815,11 +834,6 @@ class DirectoryReader extends IndexReade
}
}
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
-
if (writer != null) {
// Since we just closed, writer may now be able to
// delete unused files:
@@ -844,16 +858,16 @@ class DirectoryReader extends IndexReade
fieldSet.addAll(names);
}
return fieldSet;
- }
+ }
@Override
- public IndexReader[] getSequentialSubReaders() {
- return subReaders;
+ public ReaderContext getTopReaderContext() {
+ return topLevelReaderContext;
}
-
+
@Override
- public int getSubReaderDocBase(IndexReader subReader) {
- return subReaderToSlice.get(subReader).start;
+ public IndexReader[] getSequentialSubReaders() {
+ return subReaders;
}
/** Returns the directory this index resides in. */
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Wed Feb 9 09:35:27 2011
@@ -63,8 +63,6 @@ final class DocInverterPerField extends
fieldState.reset(docState.doc.getBoost());
- final int maxFieldLength = docState.maxFieldLength;
-
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i<count;i++) {
@@ -171,12 +169,8 @@ final class DocInverterPerField extends
if (!success)
docState.docWriter.setAborting();
}
+ fieldState.length++;
fieldState.position++;
- if (++fieldState.length >= maxFieldLength) {
- if (docState.infoStream != null)
- docState.infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");
- break;
- }
hasMoreTokens = stream.incrementToken();
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Feb 9 09:35:27 2011
@@ -30,14 +30,16 @@ import java.util.concurrent.atomic.Atomi
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.RamUsageEstimator;
+
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
@@ -127,22 +129,21 @@ final class DocumentsWriter {
private boolean aborting; // True if an abort is pending
PrintStream infoStream;
- int maxFieldLength = IndexWriterConfig.UNLIMITED_FIELD_LENGTH;
- Similarity similarity;
+ SimilarityProvider similarityProvider;
// max # simultaneous threads; if there are more than
// this, they wait for others to finish first
private final int maxThreadStates;
+ // TODO: cutover to BytesRefHash
// Deletes for our still-in-RAM (to be flushed next) segment
- private SegmentDeletes pendingDeletes = new SegmentDeletes();
+ private BufferedDeletes pendingDeletes = new BufferedDeletes(false);
static class DocState {
DocumentsWriter docWriter;
Analyzer analyzer;
- int maxFieldLength;
PrintStream infoStream;
- Similarity similarity;
+ SimilarityProvider similarityProvider;
int docID;
Document doc;
String maxTermPrefix;
@@ -191,6 +192,7 @@ final class DocumentsWriter {
/**
* Allocate bytes used from shared pool.
*/
+ @Override
protected byte[] newBuffer(int size) {
assert size == PER_DOC_BLOCK_SIZE;
return perDocAllocator.getByteBlock();
@@ -279,16 +281,16 @@ final class DocumentsWriter {
private boolean closed;
private final FieldInfos fieldInfos;
- private final BufferedDeletes bufferedDeletes;
+ private final BufferedDeletesStream bufferedDeletesStream;
private final IndexWriter.FlushControl flushControl;
- DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
+ DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
this.writer = writer;
- this.similarity = writer.getConfig().getSimilarity();
+ this.similarityProvider = writer.getConfig().getSimilarityProvider();
this.maxThreadStates = maxThreadStates;
this.fieldInfos = fieldInfos;
- this.bufferedDeletes = bufferedDeletes;
+ this.bufferedDeletesStream = bufferedDeletesStream;
flushControl = writer.flushControl;
consumer = indexingChain.getChain(this);
@@ -337,6 +339,9 @@ final class DocumentsWriter {
return doFlush;
}
+ // TODO: we could check w/ FreqProxTermsWriter: if the
+ // term doesn't exist, don't bother buffering into the
+ // per-DWPT map (but still must go into the global map)
boolean deleteTerm(Term term, boolean skipWait) {
final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait);
synchronized(this) {
@@ -358,17 +363,10 @@ final class DocumentsWriter {
}
}
- synchronized void setMaxFieldLength(int maxFieldLength) {
- this.maxFieldLength = maxFieldLength;
+ synchronized void setSimilarityProvider(SimilarityProvider similarity) {
+ this.similarityProvider = similarity;
for(int i=0;i<threadStates.length;i++) {
- threadStates[i].docState.maxFieldLength = maxFieldLength;
- }
- }
-
- synchronized void setSimilarity(Similarity similarity) {
- this.similarity = similarity;
- for(int i=0;i<threadStates.length;i++) {
- threadStates[i].docState.similarity = similarity;
+ threadStates[i].docState.similarityProvider = similarity;
}
}
@@ -509,23 +507,26 @@ final class DocumentsWriter {
}
// for testing
- public SegmentDeletes getPendingDeletes() {
+ public BufferedDeletes getPendingDeletes() {
return pendingDeletes;
}
private void pushDeletes(SegmentInfo newSegment, SegmentInfos segmentInfos) {
// Lock order: DW -> BD
+ final long delGen = bufferedDeletesStream.getNextGen();
if (pendingDeletes.any()) {
- if (newSegment != null) {
+ if (segmentInfos.size() > 0 || newSegment != null) {
+ final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen);
if (infoStream != null) {
- message("flush: push buffered deletes to newSegment");
+ message("flush: push buffered deletes");
}
- bufferedDeletes.pushDeletes(pendingDeletes, newSegment);
- } else if (segmentInfos.size() > 0) {
+ bufferedDeletesStream.push(packet);
if (infoStream != null) {
- message("flush: push buffered deletes to previously flushed segment " + segmentInfos.lastElement());
+ message("flush: delGen=" + packet.gen);
+ }
+ if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(packet.gen);
}
- bufferedDeletes.pushDeletes(pendingDeletes, segmentInfos.lastElement(), true);
} else {
if (infoStream != null) {
message("flush: drop buffered deletes: no segments");
@@ -534,7 +535,9 @@ final class DocumentsWriter {
// there are no segments, the deletions cannot
// affect anything.
}
- pendingDeletes = new SegmentDeletes();
+ pendingDeletes.clear();
+ } else if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(delGen);
}
}
@@ -546,6 +549,8 @@ final class DocumentsWriter {
// Lock order: IW -> DW
synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException {
+ final long startTime = System.currentTimeMillis();
+
// We change writer's segmentInfos:
assert Thread.holdsLock(writer);
@@ -583,6 +588,18 @@ final class DocumentsWriter {
final SegmentWriteState flushState = segWriteState();
+ // Apply delete-by-docID now (delete-byDocID only
+ // happens when an exception is hit processing that
+ // doc, eg if analyzer has some problem w/ the text):
+ if (pendingDeletes.docIDs.size() > 0) {
+ flushState.deletedDocs = new BitVector(numDocs);
+ for(int delDocID : pendingDeletes.docIDs) {
+ flushState.deletedDocs.set(delDocID);
+ }
+ pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
+ pendingDeletes.docIDs.clear();
+ }
+
newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false);
Collection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
@@ -593,10 +610,14 @@ final class DocumentsWriter {
double startMBUsed = bytesUsed()/1024./1024.;
consumer.flush(threads, flushState);
+
newSegment.setHasVectors(flushState.hasVectors);
if (infoStream != null) {
message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+ if (flushState.deletedDocs != null) {
+ message("new segment has " + flushState.deletedDocs.count() + " deleted docs");
+ }
message("flushedFiles=" + newSegment.files());
message("flushed codecs=" + newSegment.getSegmentCodecs());
}
@@ -617,6 +638,30 @@ final class DocumentsWriter {
newSegment.setUseCompoundFile(true);
}
+ // Must write deleted docs after the CFS so we don't
+ // slurp the del file into CFS:
+ if (flushState.deletedDocs != null) {
+ final int delCount = flushState.deletedDocs.count();
+ assert delCount > 0;
+ newSegment.setDelCount(delCount);
+ newSegment.advanceDelGen();
+ final String delFileName = newSegment.getDelFileName();
+ boolean success2 = false;
+ try {
+ flushState.deletedDocs.write(directory, delFileName);
+ success2 = true;
+ } finally {
+ if (!success2) {
+ try {
+ directory.deleteFile(delFileName);
+ } catch (Throwable t) {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+ }
+
if (infoStream != null) {
message("flush: segment=" + newSegment);
final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
@@ -643,6 +688,9 @@ final class DocumentsWriter {
// Lock order: IW -> DW -> BD
pushDeletes(newSegment, segmentInfos);
+ if (infoStream != null) {
+ message("flush time " + (System.currentTimeMillis()-startTime) + " msec");
+ }
return newSegment;
}
@@ -650,7 +698,7 @@ final class DocumentsWriter {
SegmentWriteState segWriteState() {
return new SegmentWriteState(infoStream, directory, segment, fieldInfos,
numDocs, writer.getConfig().getTermIndexInterval(),
- SegmentCodecs.build(fieldInfos, writer.codecs), bytesUsed);
+ SegmentCodecs.build(fieldInfos, writer.codecs), pendingDeletes, bytesUsed);
}
synchronized void close() {
@@ -909,8 +957,7 @@ final class DocumentsWriter {
final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
/* if you increase this, you must fix field cache impl for
- * getTerms/getTermsIndex requires <= 32768. Also fix
- * DeltaBytesWriter's TERM_EOF if necessary. */
+ * getTerms/getTermsIndex requires <= 32768. */
final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
/* Initial chunks size of the shared int[] blocks used to
@@ -971,7 +1018,7 @@ final class DocumentsWriter {
final boolean doBalance;
final long deletesRAMUsed;
- deletesRAMUsed = bufferedDeletes.bytesUsed();
+ deletesRAMUsed = bufferedDeletesStream.bytesUsed();
synchronized(this) {
if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java Wed Feb 9 09:35:27 2011
@@ -35,9 +35,8 @@ final class DocumentsWriterThreadState {
public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
this.docWriter = docWriter;
docState = new DocumentsWriter.DocState();
- docState.maxFieldLength = docWriter.maxFieldLength;
docState.infoStream = docWriter.infoStream;
- docState.similarity = docWriter.similarity;
+ docState.similarityProvider = docWriter.similarityProvider;
docState.docWriter = docWriter;
consumer = docWriter.consumer.addThread(this);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java Wed Feb 9 09:35:27 2011
@@ -56,7 +56,7 @@ public final class FieldInfo {
this.storeOffsetWithTermVector = false;
this.storePositionWithTermVector = false;
this.storePayloads = false;
- this.omitNorms = true;
+ this.omitNorms = false;
this.omitTermFreqAndPositions = false;
}
}
@@ -86,7 +86,7 @@ public final class FieldInfo {
this.storePayloads = true;
}
if (this.omitNorms != omitNorms) {
- this.omitNorms = false; // once norms are stored, always store
+ this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
}
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java Wed Feb 9 09:35:27 2011
@@ -284,14 +284,21 @@ public final class FieldInfos {
}
public boolean hasVectors() {
- boolean hasVectors = false;
for (int i = 0; i < size(); i++) {
if (fieldInfo(i).storeTermVector) {
- hasVectors = true;
- break;
+ return true;
}
}
- return hasVectors;
+ return false;
+ }
+
+ public boolean hasNorms() {
+ for (int i = 0; i < size(); i++) {
+ if (!fieldInfo(i).omitNorms) {
+ return true;
+ }
+ }
+ return false;
}
public void write(Directory d, String name) throws IOException {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInvertState.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInvertState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInvertState.java Wed Feb 9 09:35:27 2011
@@ -30,6 +30,7 @@ public final class FieldInvertState {
int length;
int numOverlap;
int offset;
+ int maxTermFrequency;
float boost;
AttributeSource attributeSource;
@@ -53,6 +54,7 @@ public final class FieldInvertState {
length = 0;
numOverlap = 0;
offset = 0;
+ maxTermFrequency = 0;
boost = docBoost;
attributeSource = null;
}
@@ -73,6 +75,10 @@ public final class FieldInvertState {
return length;
}
+ public void setLength(int length) {
+ this.length = length;
+ }
+
/**
* Get the number of terms with <code>positionIncrement == 0</code>.
* @return the numOverlap
@@ -81,6 +87,10 @@ public final class FieldInvertState {
return numOverlap;
}
+ public void setNumOverlap(int numOverlap) {
+ this.numOverlap = numOverlap;
+ }
+
/**
* Get end offset of the last processed term.
* @return the offset
@@ -99,6 +109,19 @@ public final class FieldInvertState {
return boost;
}
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ /**
+ * Get the maximum term-frequency encountered for any term in the field. A
+ * field containing "the quick brown fox jumps over the lazy dog" would have
+ * a value of 2, because "the" appears twice.
+ */
+ public int getMaxTermFrequency() {
+ return maxTermFrequency;
+ }
+
public AttributeSource getAttributeSource() {
return attributeSource;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Fields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Fields.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Fields.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Fields.java Wed Feb 9 09:35:27 2011
@@ -30,7 +30,7 @@ public abstract class Fields {
* names. This will not return null. */
public abstract FieldsEnum iterator() throws IOException;
- /** Get the {@link Terms} for this field. This may return
+ /** Get the {@link Terms} for this field. This will return
* null if the field does not exist. */
public abstract Terms terms(String field) throws IOException;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java Wed Feb 9 09:35:27 2011
@@ -37,8 +37,10 @@ import java.io.Reader;
* Class responsible for access to stored document fields.
* <p/>
* It uses <segment>.fdt and <segment>.fdx; files.
+ *
+ * @lucene.internal
*/
-final class FieldsReader implements Cloneable {
+public final class FieldsReader implements Cloneable {
private final static int FORMAT_SIZE = 4;
private final FieldInfos fieldInfos;
@@ -74,6 +76,23 @@ final class FieldsReader implements Clon
ensureOpen();
return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
}
+
+ /** Verifies that the code version which wrote the segment is supported. */
+ public static void checkCodeVersion(Directory dir, String segment) throws IOException {
+ final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
+ IndexInput idxStream = dir.openInput(indexStreamFN, 1024);
+
+ try {
+ int format = idxStream.readInt();
+ if (format < FieldsWriter.FORMAT_MINIMUM)
+ throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
+ if (format > FieldsWriter.FORMAT_CURRENT)
+ throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
+ } finally {
+ idxStream.close();
+ }
+
+ }
// Used only by clone
private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
@@ -89,11 +108,11 @@ final class FieldsReader implements Clon
indexStream = (IndexInput) cloneableIndexStream.clone();
}
- FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+ public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
}
- FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
+ public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
boolean success = false;
isOriginal = true;
try {
@@ -157,7 +176,7 @@ final class FieldsReader implements Clon
*
* @throws IOException
*/
- final void close() throws IOException {
+ public final void close() throws IOException {
if (!closed) {
if (fieldsStream != null) {
fieldsStream.close();
@@ -178,7 +197,7 @@ final class FieldsReader implements Clon
}
}
- final int size() {
+ public final int size() {
return size;
}
@@ -186,7 +205,7 @@ final class FieldsReader implements Clon
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
}
- final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+ public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
seekIndex(n);
long position = indexStream.readLong();
fieldsStream.seek(position);
@@ -237,7 +256,7 @@ final class FieldsReader implements Clon
* contiguous range of length numDocs starting with
* startDocID. Returns the IndexInput (the fieldStream),
* already seeked to the starting point for startDocID.*/
- final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
+ public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
seekIndex(startDocID);
long startOffset = indexStream.readLong();
long lastOffset = startOffset;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Feb 9 09:35:27 2011
@@ -19,17 +19,19 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.values.DocValues;
import org.apache.lucene.index.values.DocValuesEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MapBackedSet;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Comparator;
+import java.util.concurrent.ConcurrentHashMap;
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -105,6 +107,11 @@ public class FilterIndexReader extends I
public long getUniqueTermCount() throws IOException {
return in.getUniqueTermCount();
}
+
+ @Override
+ public long getSumTotalTermFreq() throws IOException {
+ return in.getSumTotalTermFreq();
+ }
}
/** Base class for filtering {@link TermsEnum} implementations. */
@@ -142,11 +149,6 @@ public class FilterIndexReader extends I
}
@Override
- public void cacheCurrentTerm() throws IOException {
- in.cacheCurrentTerm();
- }
-
- @Override
public SeekStatus seek(long ord) throws IOException {
return in.seek(ord);
}
@@ -167,11 +169,16 @@ public class FilterIndexReader extends I
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return in.docFreq();
}
@Override
+ public long totalTermFreq() throws IOException {
+ return in.totalTermFreq();
+ }
+
+ @Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
return in.docs(skipDocs, reuse);
}
@@ -185,6 +192,16 @@ public class FilterIndexReader extends I
public Comparator<BytesRef> getComparator() throws IOException {
return in.getComparator();
}
+
+ @Override
+ public void seek(BytesRef term, TermState state) throws IOException {
+ in.seek(term, state);
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ return in.termState();
+ }
}
/** Base class for filtering {@link DocsEnum} implementations. */
@@ -282,6 +299,7 @@ public class FilterIndexReader extends I
public FilterIndexReader(IndexReader in) {
super();
this.in = in;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
}
@Override
@@ -362,12 +380,6 @@ public class FilterIndexReader extends I
}
@Override
- public void norms(String f, byte[] bytes, int offset) throws IOException {
- ensureOpen();
- in.norms(f, bytes, offset);
- }
-
- @Override
protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException {
in.setNorm(d, f, b);
}
@@ -393,11 +405,6 @@ public class FilterIndexReader extends I
@Override
protected void doClose() throws IOException {
in.close();
-
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
}
@@ -429,6 +436,11 @@ public class FilterIndexReader extends I
public IndexReader[] getSequentialSubReaders() {
return in.getSequentialSubReaders();
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return in.getTopReaderContext();
+ }
@Override
public Fields fields() throws IOException {
@@ -451,4 +463,16 @@ public class FilterIndexReader extends I
buffer.append(')');
return buffer.toString();
}
-}
\ No newline at end of file
+
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ in.addReaderFinishedListener(listener);
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ in.removeReaderFinishedListener(listener);
+ }
+}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Wed Feb 9 09:35:27 2011
@@ -20,13 +20,15 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
-import java.util.Comparator;
-import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.PostingsConsumer;
+import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
@@ -107,7 +109,7 @@ final class FreqProxTermsWriter extends
// If this field has postings then add them to the
// segment
- appendPostings(fields, consumer);
+ appendPostings(fieldName, state, fields, consumer);
for(int i=0;i<fields.length;i++) {
TermsHashPerField perField = fields[i].termsHashPerField;
@@ -132,7 +134,8 @@ final class FreqProxTermsWriter extends
/* Walk through all unique text tokens (Posting
* instances) found in this field and serialize them
* into a single RAM segment. */
- void appendPostings(FreqProxTermsWriterPerField[] fields,
+ void appendPostings(String fieldName, SegmentWriteState state,
+ FreqProxTermsWriterPerField[] fields,
FieldsConsumer consumer)
throws CorruptIndexException, IOException {
@@ -155,16 +158,26 @@ final class FreqProxTermsWriter extends
assert result;
}
+ final Term protoTerm = new Term(fieldName);
+
FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
final boolean currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
//System.out.println("flush terms field=" + fields[0].fieldInfo.name);
+ final Map<Term,Integer> segDeletes;
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ segDeletes = state.segDeletes.terms;
+ } else {
+ segDeletes = null;
+ }
+
// TODO: really TermsHashPerField should take over most
// of this loop, including merge sort of terms from
// multiple threads and interacting with the
// TermsConsumer, only calling out to us (passing us the
// DocsConsumer) to handle delivery of docs/positions
+ long sumTotalTermFreq = 0;
while(numFields > 0) {
// Get the next term to merge
@@ -193,10 +206,23 @@ final class FreqProxTermsWriter extends
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
+ final int delDocLimit;
+ if (segDeletes != null) {
+ final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
+ if (docIDUpto != null) {
+ delDocLimit = docIDUpto;
+ } else {
+ delDocLimit = 0;
+ }
+ } else {
+ delDocLimit = 0;
+ }
+
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
int numDocs = 0;
+ long totTF = 0;
while(numToMerge > 0) {
FreqProxFieldMergeState minState = termStates[0];
@@ -211,7 +237,28 @@ final class FreqProxTermsWriter extends
assert minState.docID < flushedDocCount: "doc=" + minState.docID + " maxDoc=" + flushedDocCount;
+ // NOTE: we could check here if the docID was
+ // deleted, and skip it. However, this is somewhat
+ // dangerous because it can yield non-deterministic
+ // behavior since we may see the docID before we see
+ // the term that caused it to be deleted. This
+ // would mean some (but not all) of its postings may
+ // make it into the index, which'd alter the docFreq
+ // for those terms. We could fix this by doing two
+ // passes, ie first sweep marks all del docs, and
+ // 2nd sweep does the real flush, but I suspect
+ // that'd add too much time to flush.
+
postingsConsumer.startDoc(minState.docID, termDocFreq);
+ if (minState.docID < delDocLimit) {
+ // Mark it deleted. TODO: we could also skip
+ // writing its postings; this would be
+ // deterministic (just for this Term's docs).
+ if (state.deletedDocs == null) {
+ state.deletedDocs = new BitVector(state.numDocs);
+ }
+ state.deletedDocs.set(minState.docID);
+ }
final ByteSliceReader prox = minState.prox;
@@ -222,6 +269,7 @@ final class FreqProxTermsWriter extends
// omitTermFreqAndPositions == false so we do write positions &
// payload
int position = 0;
+ totTF += termDocFreq;
for(int j=0;j<termDocFreq;j++) {
final int code = prox.readVInt();
position += code >> 1;
@@ -286,9 +334,10 @@ final class FreqProxTermsWriter extends
}
assert numDocs > 0;
- termsConsumer.finishTerm(text, numDocs);
+ termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+ sumTotalTermFreq += totTF;
}
- termsConsumer.finish();
+ termsConsumer.finish(sumTotalTermFreq);
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Wed Feb 9 09:35:27 2011
@@ -125,6 +125,7 @@ final class FreqProxTermsWriterPerField
postings.docFreqs[termID] = 1;
writeProx(termID, fieldState.position);
}
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
}
@Override
@@ -158,11 +159,12 @@ final class FreqProxTermsWriterPerField
termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
}
postings.docFreqs[termID] = 1;
+ fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
postings.lastDocIDs[termID] = docState.docID;
writeProx(termID, fieldState.position);
} else {
- postings.docFreqs[termID]++;
+ fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
writeProx(termID, fieldState.position-postings.lastPositions[termID]);
}
}
@@ -192,6 +194,7 @@ final class FreqProxTermsWriterPerField
return new FreqProxPostingsArray(size);
}
+ @Override
void copyTo(ParallelPostingsArray toArray, int numToCopy) {
assert toArray instanceof FreqProxPostingsArray;
FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Wed Feb 9 09:35:27 2011
@@ -204,7 +204,7 @@ public final class IndexFileNames {
/**
* Returns true if the given filename ends with the given extension. One
- * should provide a <i>pure</i> extension, withouth '.'.
+ * should provide a <i>pure</i> extension, without '.'.
*/
public static boolean matchesExtension(String filename, String ext) {
// It doesn't make a difference whether we allocate a StringBuilder ourself
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java Wed Feb 9 09:35:27 2011
@@ -23,10 +23,15 @@ package org.apache.lucene.index;
*/
public class IndexFormatTooOldException extends CorruptIndexException {
+ public IndexFormatTooOldException(String filename, String version) {
+ super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
+ ": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later.");
+ }
+
public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) {
super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
- ": " + version + " (needs to be between " + minVersion + " and " + maxVersion +
- "). This version of Lucene only supports indexes created with release 3.0 and later.");
+ ": " + version + " (needs to be between " + minVersion + " and " + maxVersion +
+ "). This version of Lucene only supports indexes created with release 3.0 and later.");
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java?rev=1068809&r1=1068808&r2=1068809&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java Wed Feb 9 09:35:27 2011
@@ -21,7 +21,7 @@ import java.io.FileNotFoundException;
/**
* Signals that no index was found in the Directory. Possibly because the
- * directory is empty, however can slso indicate an index corruption.
+ * directory is empty, however can also indicate an index corruption.
*/
public final class IndexNotFoundException extends FileNotFoundException {