You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/09 02:04:13 UTC
svn commit: r1068718 [4/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/ant/
dev-tools/maven/lucene/contrib/db/bdb-je...
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java Wed Feb 9 01:03:49 2011
@@ -52,15 +52,17 @@ import java.util.TreeSet;
* high-frequency lookups of medium size synonym tables.
* <p>
* Example Usage:
- * <pre>
+ * <pre class="prettyprint">
* String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};
* SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));
* for (int i = 0; i < words.length; i++) {
* String[] synonyms = map.getSynonyms(words[i]);
* System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());
* }
- *
+ * </pre>
+ * <b/>
* Example output:
+ * <pre class="prettyprint">
* hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]
* woods:[forest, wood]
* forest:[afforest, timber, timberland, wood, woodland, woods]
@@ -161,7 +163,7 @@ public class SynonymMap {
return word.toLowerCase();
}
- private static boolean isValid(String str) {
+ protected boolean isValid(String str) {
for (int i=str.length(); --i >= 0; ) {
if (!Character.isLetter(str.charAt(i))) return false;
}
@@ -395,4 +397,4 @@ public class SynonymMap {
}
}
-}
\ No newline at end of file
+}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java Wed Feb 9 01:03:49 2011
@@ -154,7 +154,6 @@ public class NumericRangeFilterBuilder i
}
static class NoMatchFilter extends Filter {
- private static final long serialVersionUID = 1L;
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java Wed Feb 9 01:03:49 2011
@@ -73,7 +73,7 @@ public class TestParser extends LuceneTe
d.close();
writer.close();
reader=IndexReader.open(dir, true);
- searcher=new IndexSearcher(reader);
+ searcher=newSearcher(reader);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Wed Feb 9 01:03:49 2011
@@ -84,8 +84,6 @@ import org.apache.lucene.analysis.tokena
* href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* parameter as well as how numeric fields work under the hood.</p>
*
- * @lucene.experimental
- *
* @since 2.9
*/
public final class NumericTokenStream extends TokenStream {
@@ -137,7 +135,7 @@ public final class NumericTokenStream ex
}
}
- /** Implementatation of {@link NumericTermAttribute}.
+ /** Implementation of {@link NumericTermAttribute}.
* @lucene.internal
* @since 4.0
*/
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/package.html?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/package.html Wed Feb 9 01:03:49 2011
@@ -305,7 +305,7 @@ with the TokenStream.
<li>
Attribute instances are reused for all tokens of a document. Thus, a TokenStream/-Filter needs to update
the appropriate Attribute(s) in incrementToken(). The consumer, commonly the Lucene indexer, consumes the data in the
-Attributes and then calls incrementToken() again until it retuns false, which indicates that the end of the stream
+Attributes and then calls incrementToken() again until it returns false, which indicates that the end of the stream
was reached. This means that in each call of incrementToken() a TokenStream/-Filter can safely overwrite the data in
the Attribute instances.
</li>
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
import java.nio.CharBuffer;
import org.apache.lucene.util.ArrayUtil;
@@ -30,7 +29,7 @@ import org.apache.lucene.util.UnicodeUti
/**
* The term text of a Token.
*/
-public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable, Serializable {
+public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
private static int MIN_BUFFER_SIZE = 10;
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.util.AttributeImpl;
/**
@@ -26,7 +24,7 @@ import org.apache.lucene.util.AttributeI
* eg from one TokenFilter to another one.
* @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
*/
-public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable, Serializable {
+public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
private int flags = 0;
/**
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,14 +17,12 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.util.AttributeImpl;
/**
* The start and end character offset of a Token.
*/
-public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable, Serializable {
+public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
private int startOffset;
private int endOffset;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,15 +17,13 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.AttributeImpl;
/**
* The payload of a Token. See also {@link Payload}.
*/
-public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable, Serializable {
+public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
private Payload payload;
/**
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.AttributeImpl;
@@ -46,7 +44,7 @@ import org.apache.lucene.util.AttributeI
*
* </ul>
*/
-public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable, Serializable {
+public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable {
private int positionIncrement = 1;
/** Set the position increment. The default value is one.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,14 +17,12 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.util.AttributeImpl;
/**
* A Token's lexical type. The Default value is "word".
*/
-public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable, Serializable {
+public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable {
private String type;
public TypeAttributeImpl() {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java Wed Feb 9 01:03:49 2011
@@ -77,7 +77,7 @@ public abstract class AbstractField impl
* used to compute the norm factor for the field. By
* default, in the {@link
* org.apache.lucene.search.Similarity#computeNorm(String,
- * FieldInvertState)} method, the boost value is multipled
+ * FieldInvertState)} method, the boost value is multiplied
* by the {@link
* org.apache.lucene.search.Similarity#lengthNorm(String,
* int)} and then
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/DateTools.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/DateTools.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/DateTools.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/DateTools.java Wed Feb 9 01:03:49 2011
@@ -47,28 +47,37 @@ import org.apache.lucene.util.NumericUti
*/
public class DateTools {
- private final static TimeZone GMT = TimeZone.getTimeZone("GMT");
+ private static final class DateFormats {
+ final static TimeZone GMT = TimeZone.getTimeZone("GMT");
- private static final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
- private static final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
- private static final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
- private static final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
- private static final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
- private static final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
- private static final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
- static {
- // times need to be normalized so the value doesn't depend on the
- // location the index is created/used:
- YEAR_FORMAT.setTimeZone(GMT);
- MONTH_FORMAT.setTimeZone(GMT);
- DAY_FORMAT.setTimeZone(GMT);
- HOUR_FORMAT.setTimeZone(GMT);
- MINUTE_FORMAT.setTimeZone(GMT);
- SECOND_FORMAT.setTimeZone(GMT);
- MILLISECOND_FORMAT.setTimeZone(GMT);
+ final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
+ final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
+ final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
+ final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
+ final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
+ final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
+ final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
+ {
+ // times need to be normalized so the value doesn't depend on the
+ // location the index is created/used:
+ YEAR_FORMAT.setTimeZone(GMT);
+ MONTH_FORMAT.setTimeZone(GMT);
+ DAY_FORMAT.setTimeZone(GMT);
+ HOUR_FORMAT.setTimeZone(GMT);
+ MINUTE_FORMAT.setTimeZone(GMT);
+ SECOND_FORMAT.setTimeZone(GMT);
+ MILLISECOND_FORMAT.setTimeZone(GMT);
+ }
+
+ final Calendar calInstance = Calendar.getInstance(GMT, Locale.US);
}
-
- private static final Calendar calInstance = Calendar.getInstance(GMT);
+
+ private static final ThreadLocal<DateFormats> FORMATS = new ThreadLocal<DateFormats>() {
+ @Override
+ protected DateFormats initialValue() {
+ return new DateFormats();
+ }
+ };
// cannot create, the class has static methods only
private DateTools() {}
@@ -82,7 +91,7 @@ public class DateTools {
* @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
* depending on <code>resolution</code>; using GMT as timezone
*/
- public static synchronized String dateToString(Date date, Resolution resolution) {
+ public static String dateToString(Date date, Resolution resolution) {
return timeToString(date.getTime(), resolution);
}
@@ -95,24 +104,20 @@ public class DateTools {
* @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
* depending on <code>resolution</code>; using GMT as timezone
*/
- public static synchronized String timeToString(long time, Resolution resolution) {
- calInstance.setTimeInMillis(round(time, resolution));
- Date date = calInstance.getTime();
-
- if (resolution == Resolution.YEAR) {
- return YEAR_FORMAT.format(date);
- } else if (resolution == Resolution.MONTH) {
- return MONTH_FORMAT.format(date);
- } else if (resolution == Resolution.DAY) {
- return DAY_FORMAT.format(date);
- } else if (resolution == Resolution.HOUR) {
- return HOUR_FORMAT.format(date);
- } else if (resolution == Resolution.MINUTE) {
- return MINUTE_FORMAT.format(date);
- } else if (resolution == Resolution.SECOND) {
- return SECOND_FORMAT.format(date);
- } else if (resolution == Resolution.MILLISECOND) {
- return MILLISECOND_FORMAT.format(date);
+ public static String timeToString(long time, Resolution resolution) {
+ final DateFormats formats = FORMATS.get();
+
+ formats.calInstance.setTimeInMillis(round(time, resolution));
+ final Date date = formats.calInstance.getTime();
+
+ switch (resolution) {
+ case YEAR: return formats.YEAR_FORMAT.format(date);
+ case MONTH:return formats.MONTH_FORMAT.format(date);
+ case DAY: return formats.DAY_FORMAT.format(date);
+ case HOUR: return formats.HOUR_FORMAT.format(date);
+ case MINUTE: return formats.MINUTE_FORMAT.format(date);
+ case SECOND: return formats.SECOND_FORMAT.format(date);
+ case MILLISECOND: return formats.MILLISECOND_FORMAT.format(date);
}
throw new IllegalArgumentException("unknown resolution " + resolution);
@@ -128,7 +133,7 @@ public class DateTools {
* @throws ParseException if <code>dateString</code> is not in the
* expected format
*/
- public static synchronized long stringToTime(String dateString) throws ParseException {
+ public static long stringToTime(String dateString) throws ParseException {
return stringToDate(dateString).getTime();
}
@@ -142,21 +147,23 @@ public class DateTools {
* @throws ParseException if <code>dateString</code> is not in the
* expected format
*/
- public static synchronized Date stringToDate(String dateString) throws ParseException {
+ public static Date stringToDate(String dateString) throws ParseException {
+ final DateFormats formats = FORMATS.get();
+
if (dateString.length() == 4) {
- return YEAR_FORMAT.parse(dateString);
+ return formats.YEAR_FORMAT.parse(dateString);
} else if (dateString.length() == 6) {
- return MONTH_FORMAT.parse(dateString);
+ return formats.MONTH_FORMAT.parse(dateString);
} else if (dateString.length() == 8) {
- return DAY_FORMAT.parse(dateString);
+ return formats.DAY_FORMAT.parse(dateString);
} else if (dateString.length() == 10) {
- return HOUR_FORMAT.parse(dateString);
+ return formats.HOUR_FORMAT.parse(dateString);
} else if (dateString.length() == 12) {
- return MINUTE_FORMAT.parse(dateString);
+ return formats.MINUTE_FORMAT.parse(dateString);
} else if (dateString.length() == 14) {
- return SECOND_FORMAT.parse(dateString);
+ return formats.SECOND_FORMAT.parse(dateString);
} else if (dateString.length() == 17) {
- return MILLISECOND_FORMAT.parse(dateString);
+ return formats.MILLISECOND_FORMAT.parse(dateString);
}
throw new ParseException("Input is not valid date string: " + dateString, 0);
}
@@ -170,7 +177,7 @@ public class DateTools {
* @return the date with all values more precise than <code>resolution</code>
* set to 0 or 1
*/
- public static synchronized Date round(Date date, Resolution resolution) {
+ public static Date round(Date date, Resolution resolution) {
return new Date(round(date.getTime(), resolution));
}
@@ -184,67 +191,63 @@ public class DateTools {
* @return the date with all values more precise than <code>resolution</code>
* set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
*/
- public static synchronized long round(long time, Resolution resolution) {
+ public static long round(long time, Resolution resolution) {
+ final Calendar calInstance = FORMATS.get().calInstance;
calInstance.setTimeInMillis(time);
- if (resolution == Resolution.YEAR) {
- calInstance.set(Calendar.MONTH, 0);
- calInstance.set(Calendar.DAY_OF_MONTH, 1);
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MONTH) {
- calInstance.set(Calendar.DAY_OF_MONTH, 1);
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.DAY) {
- calInstance.set(Calendar.HOUR_OF_DAY, 0);
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.HOUR) {
- calInstance.set(Calendar.MINUTE, 0);
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MINUTE) {
- calInstance.set(Calendar.SECOND, 0);
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.SECOND) {
- calInstance.set(Calendar.MILLISECOND, 0);
- } else if (resolution == Resolution.MILLISECOND) {
- // don't cut off anything
- } else {
- throw new IllegalArgumentException("unknown resolution " + resolution);
+ switch (resolution) {
+ case YEAR:
+ calInstance.set(Calendar.MONTH, 0);
+ calInstance.set(Calendar.DAY_OF_MONTH, 1);
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MONTH:
+ calInstance.set(Calendar.DAY_OF_MONTH, 1);
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case DAY:
+ calInstance.set(Calendar.HOUR_OF_DAY, 0);
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case HOUR:
+ calInstance.set(Calendar.MINUTE, 0);
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MINUTE:
+ calInstance.set(Calendar.SECOND, 0);
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case SECOND:
+ calInstance.set(Calendar.MILLISECOND, 0);
+ break;
+ case MILLISECOND:
+ // don't cut off anything
+ break;
+ default:
+ throw new IllegalArgumentException("unknown resolution " + resolution);
}
return calInstance.getTimeInMillis();
}
/** Specifies the time granularity. */
- public static class Resolution {
+ public static enum Resolution {
- public static final Resolution YEAR = new Resolution("year");
- public static final Resolution MONTH = new Resolution("month");
- public static final Resolution DAY = new Resolution("day");
- public static final Resolution HOUR = new Resolution("hour");
- public static final Resolution MINUTE = new Resolution("minute");
- public static final Resolution SECOND = new Resolution("second");
- public static final Resolution MILLISECOND = new Resolution("millisecond");
+ YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND;
- private String resolution;
-
- private Resolution() {
- }
-
- private Resolution(String resolution) {
- this.resolution = resolution;
- }
-
+ /** this method returns the name of the resolution
+ * in lowercase (for backwards compatibility) */
@Override
public String toString() {
- return resolution;
+ return super.toString().toLowerCase(Locale.ENGLISH);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java Wed Feb 9 01:03:49 2011
@@ -35,7 +35,7 @@ import org.apache.lucene.index.IndexRead
* ScoreDoc#doc} or {@link IndexReader#document(int)}.
*/
-public final class Document implements java.io.Serializable {
+public final class Document {
List<Fieldable> fields = new ArrayList<Fieldable>();
private float boost = 1.0f;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Field.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Field.java Wed Feb 9 01:03:49 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.document;
*/
import java.io.Reader;
-import java.io.Serializable;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexWriter;
@@ -32,7 +31,7 @@ import org.apache.lucene.util.StringHelp
index, so that they may be returned with hits on the document.
*/
-public final class Field extends AbstractField implements Fieldable, Serializable {
+public final class Field extends AbstractField implements Fieldable {
/** Specifies whether and how a field should be stored. */
public static enum Store {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/FieldSelector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/FieldSelector.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/FieldSelector.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/FieldSelector.java Wed Feb 9 01:03:49 2011
@@ -1,6 +1,5 @@
package org.apache.lucene.document;
-import java.io.Serializable;
/**
* Copyright 2004 The Apache Software Foundation
*
@@ -22,7 +21,7 @@ import java.io.Serializable;
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
*
**/
-public interface FieldSelector extends Serializable {
+public interface FieldSelector {
/**
*
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java Wed Feb 9 01:03:49 2011
@@ -22,7 +22,6 @@ import org.apache.lucene.search.PhraseQu
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import java.io.Reader;
-import java.io.Serializable;
/**
* Synonymous with {@link Field}.
@@ -34,7 +33,7 @@ import java.io.Serializable;
* </p>
*
**/
-public interface Fieldable extends Serializable {
+public interface Fieldable {
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java Wed Feb 9 01:03:49 2011
@@ -134,8 +134,6 @@ import org.apache.lucene.search.FieldCac
* values are returned as {@link String}s (according to
* <code>toString(value)</code> of the used data type).
*
- * @lucene.experimental
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Wed Feb 9 01:03:49 2011
@@ -145,7 +145,7 @@ public class ConcurrentMergeScheduler ex
/**
* Called whenever the running merges have changed, to pause & unpause
* threads. This method sorts the merge threads by their merge size in
- * descending order and then pauses/unpauses threads from first to lsat --
+ * descending order and then pauses/unpauses threads from first to last --
* that way, smaller merges are guaranteed to run before larger ones.
*/
protected synchronized void updateMergeThreads() {
@@ -308,10 +308,31 @@ public class ConcurrentMergeScheduler ex
// pending merges, until it's empty:
while (true) {
+ synchronized(this) {
+ long startStallTime = 0;
+ while (mergeThreadCount() >= 1+maxMergeCount) {
+ startStallTime = System.currentTimeMillis();
+ if (verbose()) {
+ message(" too many merges; stalling...");
+ }
+ try {
+ wait();
+ } catch (InterruptedException ie) {
+ throw new ThreadInterruptedException(ie);
+ }
+ }
+
+ if (verbose()) {
+ if (startStallTime != 0) {
+ message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec");
+ }
+ }
+ }
+
+
// TODO: we could be careful about which merges to do in
// the BG (eg maybe the "biggest" ones) vs FG, which
// merges to do first (the easiest ones?), etc.
-
MergePolicy.OneMerge merge = writer.getNextMerge();
if (merge == null) {
if (verbose())
@@ -326,32 +347,11 @@ public class ConcurrentMergeScheduler ex
boolean success = false;
try {
synchronized(this) {
- final MergeThread merger;
- long startStallTime = 0;
- while (mergeThreadCount() >= maxMergeCount) {
- startStallTime = System.currentTimeMillis();
- if (verbose()) {
- message(" too many merges; stalling...");
- }
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
-
- if (verbose()) {
- if (startStallTime != 0) {
- message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec");
- }
- message(" consider merge " + merge.segString(dir));
- }
-
- assert mergeThreadCount() < maxMergeCount;
+ message(" consider merge " + merge.segString(dir));
// OK to spawn a new merge thread to handle this
// merge:
- merger = getMergeThread(writer, merge);
+ final MergeThread merger = getMergeThread(writer, merge);
mergeThreads.add(merger);
if (verbose()) {
message(" launch new thread [" + merger.getName() + "]");
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Wed Feb 9 01:03:49 2011
@@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
@@ -36,8 +37,7 @@ import org.apache.lucene.store.LockObtai
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
-
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
+import org.apache.lucene.util.MapBackedSet;
/**
* An IndexReader which reads indexes with multiple segments.
@@ -70,6 +70,8 @@ class DirectoryReader extends IndexReade
// opened on a past IndexCommit:
private long maxIndexVersion;
+ private final boolean applyAllDeletes;
+
// static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
// final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
// return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null);
@@ -106,6 +108,8 @@ class DirectoryReader extends IndexReade
} else {
this.codecs = codecs;
}
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
+ applyAllDeletes = false;
// To reduce the chance of hitting FileNotFound
// (and having to retry), we open segments in
@@ -117,6 +121,7 @@ class DirectoryReader extends IndexReade
boolean success = false;
try {
readers[i] = SegmentReader.get(readOnly, sis.info(i), termInfosIndexDivisor);
+ readers[i].readerFinishedListeners = readerFinishedListeners;
success = true;
} finally {
if (!success) {
@@ -136,9 +141,11 @@ class DirectoryReader extends IndexReade
}
// Used by near real-time search
- DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
+ this.applyAllDeletes = applyAllDeletes; // saved for reopen
+
segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (codecs == null) {
@@ -146,6 +153,7 @@ class DirectoryReader extends IndexReade
} else {
this.codecs = codecs;
}
+ readerFinishedListeners = writer.getReaderFinishedListeners();
// IndexWriter synchronizes externally before calling
// us, which ensures infos will not change; so there's
@@ -160,6 +168,7 @@ class DirectoryReader extends IndexReade
final SegmentInfo info = infos.info(i);
assert info.dir == dir;
readers[i] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
+ readers[i].readerFinishedListeners = readerFinishedListeners;
success = true;
} finally {
if (!success) {
@@ -182,11 +191,15 @@ class DirectoryReader extends IndexReade
/** This constructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
- boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs,
+ Collection<ReaderFinishedListener> readerFinishedListeners) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
+ this.readerFinishedListeners = readerFinishedListeners;
+ applyAllDeletes = false;
+
if (codecs == null) {
this.codecs = CodecProvider.getDefault();
} else {
@@ -232,8 +245,10 @@ class DirectoryReader extends IndexReade
// this is a new reader; in case we hit an exception we can close it safely
newReader = SegmentReader.get(readOnly, infos.info(i), termInfosIndexDivisor);
+ newReader.readerFinishedListeners = readerFinishedListeners;
} else {
newReader = newReaders[i].reopenSegment(infos.info(i), doClone, readOnly);
+ assert newReader.readerFinishedListeners == readerFinishedListeners;
}
if (newReader == newReaders[i]) {
// this reader will be shared between the old and the new one,
@@ -357,6 +372,7 @@ class DirectoryReader extends IndexReade
writeLock = null;
hasChanges = false;
}
+ assert newReader.readerFinishedListeners != null;
return newReader;
}
@@ -391,7 +407,9 @@ class DirectoryReader extends IndexReade
// TODO: right now we *always* make a new reader; in
// the future we could have write make some effort to
// detect that no changes have occurred
- return writer.getReader();
+ IndexReader reader = writer.getReader(applyAllDeletes);
+ reader.readerFinishedListeners = readerFinishedListeners;
+ return reader;
}
private IndexReader doReopen(final boolean openReadOnly, IndexCommit commit) throws CorruptIndexException, IOException {
@@ -458,7 +476,7 @@ class DirectoryReader extends IndexReade
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
- reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs);
+ reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs, readerFinishedListeners);
return reader;
}
@@ -705,12 +723,16 @@ class DirectoryReader extends IndexReade
// case we have to roll back:
startCommit();
+ final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
+ rollbackSegmentInfos.addAll(segmentInfos);
+
boolean success = false;
try {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
- // Remove segments that contain only 100% deleted docs:
+ // Remove segments that contain only 100% deleted
+ // docs:
segmentInfos.pruneDeletedSegments();
// Sync all files we just wrote
@@ -732,6 +754,10 @@ class DirectoryReader extends IndexReade
// partially written .del files, etc, are
// removed):
deleter.refresh();
+
+ // Restore all SegmentInfos (in case we pruned some)
+ segmentInfos.clear();
+ segmentInfos.addAll(rollbackSegmentInfos);
}
}
@@ -808,11 +834,6 @@ class DirectoryReader extends IndexReade
}
}
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
-
if (writer != null) {
// Since we just closed, writer may now be able to
// delete unused files:
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Feb 9 01:03:49 2011
@@ -35,9 +35,11 @@ import org.apache.lucene.store.AlreadyCl
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMFile;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.ThreadInterruptedException;
-import org.apache.lucene.util.RamUsageEstimator;
+
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
@@ -133,8 +135,9 @@ final class DocumentsWriter {
// this, they wait for others to finish first
private final int maxThreadStates;
+ // TODO: cutover to BytesRefHash
// Deletes for our still-in-RAM (to be flushed next) segment
- private SegmentDeletes pendingDeletes = new SegmentDeletes();
+ private BufferedDeletes pendingDeletes = new BufferedDeletes(false);
static class DocState {
DocumentsWriter docWriter;
@@ -278,16 +281,16 @@ final class DocumentsWriter {
private boolean closed;
private final FieldInfos fieldInfos;
- private final BufferedDeletes bufferedDeletes;
+ private final BufferedDeletesStream bufferedDeletesStream;
private final IndexWriter.FlushControl flushControl;
- DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
+ DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarityProvider = writer.getConfig().getSimilarityProvider();
this.maxThreadStates = maxThreadStates;
this.fieldInfos = fieldInfos;
- this.bufferedDeletes = bufferedDeletes;
+ this.bufferedDeletesStream = bufferedDeletesStream;
flushControl = writer.flushControl;
consumer = indexingChain.getChain(this);
@@ -336,6 +339,9 @@ final class DocumentsWriter {
return doFlush;
}
+ // TODO: we could check w/ FreqProxTermsWriter: if the
+ // term doesn't exist, don't bother buffering into the
+ // per-DWPT map (but still must go into the global map)
boolean deleteTerm(Term term, boolean skipWait) {
final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait);
synchronized(this) {
@@ -501,23 +507,26 @@ final class DocumentsWriter {
}
// for testing
- public SegmentDeletes getPendingDeletes() {
+ public BufferedDeletes getPendingDeletes() {
return pendingDeletes;
}
private void pushDeletes(SegmentInfo newSegment, SegmentInfos segmentInfos) {
// Lock order: DW -> BD
+ final long delGen = bufferedDeletesStream.getNextGen();
if (pendingDeletes.any()) {
- if (newSegment != null) {
+ if (segmentInfos.size() > 0 || newSegment != null) {
+ final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen);
if (infoStream != null) {
- message("flush: push buffered deletes to newSegment");
+ message("flush: push buffered deletes");
}
- bufferedDeletes.pushDeletes(pendingDeletes, newSegment);
- } else if (segmentInfos.size() > 0) {
+ bufferedDeletesStream.push(packet);
if (infoStream != null) {
- message("flush: push buffered deletes to previously flushed segment " + segmentInfos.lastElement());
+ message("flush: delGen=" + packet.gen);
+ }
+ if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(packet.gen);
}
- bufferedDeletes.pushDeletes(pendingDeletes, segmentInfos.lastElement(), true);
} else {
if (infoStream != null) {
message("flush: drop buffered deletes: no segments");
@@ -526,7 +535,9 @@ final class DocumentsWriter {
// there are no segments, the deletions cannot
// affect anything.
}
- pendingDeletes = new SegmentDeletes();
+ pendingDeletes.clear();
+ } else if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(delGen);
}
}
@@ -577,7 +588,19 @@ final class DocumentsWriter {
final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
numDocs, writer.getConfig().getTermIndexInterval(),
- SegmentCodecs.build(fieldInfos, writer.codecs));
+ SegmentCodecs.build(fieldInfos, writer.codecs),
+ pendingDeletes);
+ // Apply delete-by-docID now (delete-byDocID only
+ // happens when an exception is hit processing that
+ // doc, eg if analyzer has some problem w/ the text):
+ if (pendingDeletes.docIDs.size() > 0) {
+ flushState.deletedDocs = new BitVector(numDocs);
+ for(int delDocID : pendingDeletes.docIDs) {
+ flushState.deletedDocs.set(delDocID);
+ }
+ pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
+ pendingDeletes.docIDs.clear();
+ }
newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false);
@@ -589,10 +612,14 @@ final class DocumentsWriter {
double startMBUsed = bytesUsed()/1024./1024.;
consumer.flush(threads, flushState);
+
newSegment.setHasVectors(flushState.hasVectors);
if (infoStream != null) {
message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+ if (flushState.deletedDocs != null) {
+ message("new segment has " + flushState.deletedDocs.count() + " deleted docs");
+ }
message("flushedFiles=" + newSegment.files());
message("flushed codecs=" + newSegment.getSegmentCodecs());
}
@@ -613,6 +640,30 @@ final class DocumentsWriter {
newSegment.setUseCompoundFile(true);
}
+ // Must write deleted docs after the CFS so we don't
+ // slurp the del file into CFS:
+ if (flushState.deletedDocs != null) {
+ final int delCount = flushState.deletedDocs.count();
+ assert delCount > 0;
+ newSegment.setDelCount(delCount);
+ newSegment.advanceDelGen();
+ final String delFileName = newSegment.getDelFileName();
+ boolean success2 = false;
+ try {
+ flushState.deletedDocs.write(directory, delFileName);
+ success2 = true;
+ } finally {
+ if (!success2) {
+ try {
+ directory.deleteFile(delFileName);
+ } catch (Throwable t) {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+ }
+
if (infoStream != null) {
message("flush: segment=" + newSegment);
final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
@@ -639,7 +690,6 @@ final class DocumentsWriter {
// Lock order: IW -> DW -> BD
pushDeletes(newSegment, segmentInfos);
-
if (infoStream != null) {
message("flush time " + (System.currentTimeMillis()-startTime) + " msec");
}
@@ -964,7 +1014,7 @@ final class DocumentsWriter {
final boolean doBalance;
final long deletesRAMUsed;
- deletesRAMUsed = bufferedDeletes.bytesUsed();
+ deletesRAMUsed = bufferedDeletesStream.bytesUsed();
synchronized(this) {
if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Wed Feb 9 01:03:49 2011
@@ -22,13 +22,14 @@ import org.apache.lucene.document.FieldS
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MapBackedSet;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Comparator;
+import java.util.concurrent.ConcurrentHashMap;
/** A <code>FilterIndexReader</code> contains another IndexReader, which it
* uses as its basic source of data, possibly transforming the data along the
@@ -283,6 +284,7 @@ public class FilterIndexReader extends I
public FilterIndexReader(IndexReader in) {
super();
this.in = in;
+ readerFinishedListeners = new MapBackedSet<ReaderFinishedListener>(new ConcurrentHashMap<ReaderFinishedListener,Boolean>());
}
@Override
@@ -388,11 +390,6 @@ public class FilterIndexReader extends I
@Override
protected void doClose() throws IOException {
in.close();
-
- // NOTE: only needed in case someone had asked for
- // FieldCache for top-level reader (which is generally
- // not a good idea):
- FieldCache.DEFAULT.purge(this);
}
@@ -451,4 +448,16 @@ public class FilterIndexReader extends I
buffer.append(')');
return buffer.toString();
}
-}
\ No newline at end of file
+
+ @Override
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ super.addReaderFinishedListener(listener);
+ in.addReaderFinishedListener(listener);
+ }
+
+ @Override
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ super.removeReaderFinishedListener(listener);
+ in.removeReaderFinishedListener(listener);
+ }
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Wed Feb 9 01:03:49 2011
@@ -26,8 +26,9 @@ import java.util.Map;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
-import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.index.codecs.TermStats;
+import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
@@ -108,7 +109,7 @@ final class FreqProxTermsWriter extends
// If this field has postings then add them to the
// segment
- appendPostings(fields, consumer);
+ appendPostings(fieldName, state, fields, consumer);
for(int i=0;i<fields.length;i++) {
TermsHashPerField perField = fields[i].termsHashPerField;
@@ -133,7 +134,8 @@ final class FreqProxTermsWriter extends
/* Walk through all unique text tokens (Posting
* instances) found in this field and serialize them
* into a single RAM segment. */
- void appendPostings(FreqProxTermsWriterPerField[] fields,
+ void appendPostings(String fieldName, SegmentWriteState state,
+ FreqProxTermsWriterPerField[] fields,
FieldsConsumer consumer)
throws CorruptIndexException, IOException {
@@ -156,11 +158,20 @@ final class FreqProxTermsWriter extends
assert result;
}
+ final Term protoTerm = new Term(fieldName);
+
FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
final boolean currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
//System.out.println("flush terms field=" + fields[0].fieldInfo.name);
+ final Map<Term,Integer> segDeletes;
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ segDeletes = state.segDeletes.terms;
+ } else {
+ segDeletes = null;
+ }
+
// TODO: really TermsHashPerField should take over most
// of this loop, including merge sort of terms from
// multiple threads and interacting with the
@@ -195,6 +206,18 @@ final class FreqProxTermsWriter extends
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
+ final int delDocLimit;
+ if (segDeletes != null) {
+ final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
+ if (docIDUpto != null) {
+ delDocLimit = docIDUpto;
+ } else {
+ delDocLimit = 0;
+ }
+ } else {
+ delDocLimit = 0;
+ }
+
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
@@ -214,7 +237,28 @@ final class FreqProxTermsWriter extends
assert minState.docID < flushedDocCount: "doc=" + minState.docID + " maxDoc=" + flushedDocCount;
+ // NOTE: we could check here if the docID was
+ // deleted, and skip it. However, this is somewhat
+ // dangerous because it can yield non-deterministic
+ // behavior since we may see the docID before we see
+ // the term that caused it to be deleted. This
+ // would mean some (but not all) of its postings may
+ // make it into the index, which'd alter the docFreq
+ // for those terms. We could fix this by doing two
+ // passes, ie first sweep marks all del docs, and
+ // 2nd sweep does the real flush, but I suspect
+ // that'd add too much time to flush.
+
postingsConsumer.startDoc(minState.docID, termDocFreq);
+ if (minState.docID < delDocLimit) {
+ // Mark it deleted. TODO: we could also skip
+ // writing its postings; this would be
+ // deterministic (just for this Term's docs).
+ if (state.deletedDocs == null) {
+ state.deletedDocs = new BitVector(state.numDocs);
+ }
+ state.deletedDocs.set(minState.docID);
+ }
final ByteSliceReader prox = minState.prox;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Wed Feb 9 01:03:49 2011
@@ -194,6 +194,7 @@ final class FreqProxTermsWriterPerField
return new FreqProxPostingsArray(size);
}
+ @Override
void copyTo(ParallelPostingsArray toArray, int numToCopy) {
assert toArray instanceof FreqProxPostingsArray;
FreqProxPostingsArray to = (FreqProxPostingsArray) toArray;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexFileNames.java Wed Feb 9 01:03:49 2011
@@ -204,7 +204,7 @@ public final class IndexFileNames {
/**
* Returns true if the given filename ends with the given extension. One
- * should provide a <i>pure</i> extension, withouth '.'.
+ * should provide a <i>pure</i> extension, without '.'.
*/
public static boolean matchesExtension(String filename, String ext) {
// It doesn't make a difference whether we allocate a StringBuilder ourself
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexNotFoundException.java Wed Feb 9 01:03:49 2011
@@ -21,7 +21,7 @@ import java.io.FileNotFoundException;
/**
* Signals that no index was found in the Directory. Possibly because the
- * directory is empty, however can slso indicate an index corruption.
+ * directory is empty, however can also indicate an index corruption.
*/
public final class IndexNotFoundException extends FileNotFoundException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java Wed Feb 9 01:03:49 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -82,6 +83,62 @@ import java.util.concurrent.atomic.Atomi
public abstract class IndexReader implements Cloneable,Closeable {
/**
+ * A custom listener that's invoked when the IndexReader
+ * is finished.
+ *
+ * <p>For a SegmentReader, this listener is called only
+ * once all SegmentReaders sharing the same core are
+ * closed. At this point it is safe for apps to evict
+ * this reader from any caches keyed on {@link
+ * #getCoreCacheKey}. This is the same interface that
+ * {@link FieldCache} uses, internally, to evict
+ * entries.</p>
+ *
+ * <p>For other readers, this listener is called when they
+ * are closed.</p>
+ *
+ * @lucene.experimental
+ */
+ public static interface ReaderFinishedListener {
+ public void finished(IndexReader reader);
+ }
+
+ // Impls must set this if they may call add/removeReaderFinishedListener:
+ protected volatile Collection<ReaderFinishedListener> readerFinishedListeners;
+
+ /** Expert: adds a {@link ReaderFinishedListener}. The
+ * provided listener is also added to any sub-readers, if
+ * this is a composite reader. Also, any reader reopened
+ * or cloned from this one will also copy the listeners at
+ * the time of reopen.
+ *
+ * @lucene.experimental */
+ public void addReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.add(listener);
+ }
+
+ /** Expert: remove a previously added {@link ReaderFinishedListener}.
+ *
+ * @lucene.experimental */
+ public void removeReaderFinishedListener(ReaderFinishedListener listener) {
+ readerFinishedListeners.remove(listener);
+ }
+
+ protected void notifyReaderFinishedListeners() {
+ // Defensive (should never be null -- all impls must set
+ // this):
+ if (readerFinishedListeners != null) {
+ for(ReaderFinishedListener listener : readerFinishedListeners) {
+ listener.finished(this);
+ }
+ }
+ }
+
+ protected void readerFinished() {
+ notifyReaderFinishedListeners();
+ }
+
+ /**
* Constants describing field properties, for example used for
* {@link IndexReader#getFieldNames(FieldOption)}.
*/
@@ -195,6 +252,7 @@ public abstract class IndexReader implem
refCount.incrementAndGet();
}
}
+ readerFinished();
}
}
@@ -238,24 +296,26 @@ public abstract class IndexReader implem
/**
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
*
- *
* @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
* @return The new IndexReader
* @throws CorruptIndexException
* @throws IOException if there is a low-level IO error
*
- * @see #reopen(IndexWriter)
+ * @see #reopen(IndexWriter,boolean)
*
* @lucene.experimental
*/
- public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
-
-
-
/** Expert: returns an IndexReader reading the index in the given
* {@link IndexCommit}. You should pass readOnly=true, since it
* gives much better concurrent performance, unless you
@@ -358,7 +418,10 @@ public abstract class IndexReader implem
* memory. By setting this to a value > 1 you can reduce
* memory usage, at the expense of higher latency when
* loading a TermInfo. The default value is 1. Set this
- * to -1 to skip loading the terms index entirely.
+ * to -1 to skip loading the terms index entirely. This is only useful in
+ * advanced situations when you will only .next() through all terms;
+ * attempts to seek will hit an exception.
+ *
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
@@ -557,18 +620,26 @@ public abstract class IndexReader implem
* if you attempt to reopen any of those readers, you'll
* hit an {@link AlreadyClosedException}.</p>
*
- * @lucene.experimental
- *
* @return IndexReader that covers entire index plus all
* changes made so far by this IndexWriter instance
*
+ * @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
+ *
* @throws IOException
+ *
+ * @lucene.experimental
*/
- public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
/**
* Efficiently clones the IndexReader (sharing most
* internal state).
@@ -1198,11 +1269,13 @@ public abstract class IndexReader implem
/** Undeletes all documents currently marked as deleted in
* this index.
*
- * <p>NOTE: this is only a best-effort process. For
- * example, if all documents in a given segment were
- * deleted, Lucene now drops that segment from the index,
- * which means its documents will not be recovered by this
- * method.
+ * <p>NOTE: this method can only recover documents marked
+ * for deletion but not yet removed from the index; when
+ * and how Lucene removes deleted documents is an
+ * implementation detail, subject to change from release
+ * to release. However, you can use {@link
+ * #numDeletedDocs} on the current IndexReader instance to
+ * see how many documents will be un-deleted.
*
* @throws StaleReaderException if the index has changed
* since this reader was opened