You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/23 16:07:19 UTC
svn commit: r1160700 [8/22] - in /lucene/dev/branches/flexscoring: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/
dev-tools/idea/lucene/contrib/demo/
dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/contrib/q...
Copied: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (from r1160237, lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java?p2=lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java&r1=1160237&r2=1160700&rev=1160700&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java Tue Aug 23 14:06:58 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.Similarity.ExactDocScorer;
+import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.util.ArrayUtil;
import java.io.IOException;
import java.util.Comparator;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCache.java Tue Aug 23 14:06:58 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.cache.EntryCreator;
@@ -654,6 +655,18 @@ public interface FieldCache {
throws IOException;
/**
+ * Checks the internal cache for an appropriate entry, and if none is found, reads the term values
+ * in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
+ * the terms (as ords) per document.
+ *
+ * @param reader Used to build a {@link DocTermOrds} instance
+ * @param field Which field contains the strings.
+ * @return a {@link DocTermOrds} instance
+ * @throws IOException If any error occurs.
+ */
+ public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException;
+
+ /**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
* Can be useful for logging/debugging.
* @lucene.experimental
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java Tue Aug 23 14:06:58 2011
@@ -17,32 +17,16 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.WeakHashMap;
-
+import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.cache.ByteValuesCreator;
-import org.apache.lucene.search.cache.DocTermsCreator;
-import org.apache.lucene.search.cache.DocTermsIndexCreator;
-import org.apache.lucene.search.cache.DoubleValuesCreator;
-import org.apache.lucene.search.cache.EntryCreator;
-import org.apache.lucene.search.cache.FloatValuesCreator;
-import org.apache.lucene.search.cache.IntValuesCreator;
-import org.apache.lucene.search.cache.LongValuesCreator;
-import org.apache.lucene.search.cache.ShortValuesCreator;
-import org.apache.lucene.search.cache.CachedArray.ByteValues;
-import org.apache.lucene.search.cache.CachedArray.DoubleValues;
-import org.apache.lucene.search.cache.CachedArray.FloatValues;
-import org.apache.lucene.search.cache.CachedArray.IntValues;
-import org.apache.lucene.search.cache.CachedArray.LongValues;
-import org.apache.lucene.search.cache.CachedArray.ShortValues;
+import org.apache.lucene.search.cache.*;
+import org.apache.lucene.search.cache.CachedArray.*;
import org.apache.lucene.util.FieldCacheSanityChecker;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.*;
+
/**
* Expert: The default cache implementation, storing all values in memory.
* A WeakHashMap is used for storage.
@@ -61,7 +45,7 @@ public class FieldCacheImpl implements F
init();
}
private synchronized void init() {
- caches = new HashMap<Class<?>,Cache>(7);
+ caches = new HashMap<Class<?>,Cache>(9);
caches.put(Byte.TYPE, new Cache<ByteValues>(this));
caches.put(Short.TYPE, new Cache<ShortValues>(this));
caches.put(Integer.TYPE, new Cache<IntValues>(this));
@@ -70,6 +54,7 @@ public class FieldCacheImpl implements F
caches.put(Double.TYPE, new Cache<DoubleValues>(this));
caches.put(DocTermsIndex.class, new Cache<DocTermsIndex>(this));
caches.put(DocTerms.class, new Cache<DocTerms>(this));
+ caches.put(DocTermOrds.class, new Cache<DocTermOrds>(this));
}
public synchronized void purgeAllCaches() {
@@ -393,6 +378,11 @@ public class FieldCacheImpl implements F
return (DocTerms)caches.get(DocTerms.class).get(reader, new Entry(field, creator));
}
+ @SuppressWarnings("unchecked")
+ public DocTermOrds getDocTermOrds(IndexReader reader, String field) throws IOException {
+ return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, new DocTermOrdsCreator(field, 0)));
+ }
+
private volatile PrintStream infoStream;
public void setInfoStream(PrintStream stream) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -28,6 +28,8 @@ import org.apache.lucene.index.DocsAndPo
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
+// TODO: move this class to oal.index
+
/**
* Abstract class for enumerating a subset of all terms.
*
@@ -42,7 +44,7 @@ import org.apache.lucene.util.Bits;
public abstract class FilteredTermsEnum extends TermsEnum {
private BytesRef initialSeekTerm = null;
- private boolean doSeek = true;
+ private boolean doSeek;
private BytesRef actualTerm = null;
private final TermsEnum tenum;
@@ -64,8 +66,17 @@ public abstract class FilteredTermsEnum
* @param tenum the terms enumeration to filter.
*/
public FilteredTermsEnum(final TermsEnum tenum) {
+ this(tenum, true);
+ }
+
+ /**
+ * Creates a filtered {@link TermsEnum} on a terms enum.
+ * @param tenum the terms enumeration to filter.
+ */
+ public FilteredTermsEnum(final TermsEnum tenum, final boolean startWithSeek) {
assert tenum != null;
this.tenum = tenum;
+ doSeek = startWithSeek;
}
/**
@@ -190,18 +201,23 @@ public abstract class FilteredTermsEnum
@SuppressWarnings("fallthrough")
@Override
public BytesRef next() throws IOException {
+ //System.out.println("FTE.next doSeek=" + doSeek);
+ //new Throwable().printStackTrace(System.out);
for (;;) {
// Seek or forward the iterator
if (doSeek) {
doSeek = false;
final BytesRef t = nextSeekTerm(actualTerm);
+ //System.out.println(" seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
// Make sure we always seek forward:
assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
if (t == null || tenum.seekCeil(t, false) == SeekStatus.END) {
// no more terms to seek to or enum exhausted
+ //System.out.println(" return null");
return null;
}
actualTerm = tenum.term();
+ //System.out.println(" got term=" + actualTerm.utf8ToString());
} else {
actualTerm = tenum.next();
if (actualTerm == null) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java Tue Aug 23 14:06:58 2011
@@ -137,12 +137,10 @@ public class FuzzyQuery extends MultiTer
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
- TermsEnum tenum = terms.iterator();
-
if (!termLongEnough) { // can only match if it's exact
- return new SingleTermsEnum(tenum, term);
+ return new SingleTermsEnum(terms.iterator(), term.bytes());
}
- return new FuzzyTermsEnum(tenum, atts, getTerm(), minimumSimilarity, prefixLength);
+ return new FuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
}
/**
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -17,12 +17,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
@@ -34,13 +39,9 @@ import org.apache.lucene.util.automaton.
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
/** Subclass of TermsEnum for enumerating all terms that are similar
* to the specified filter term.
*
@@ -72,7 +73,7 @@ public final class FuzzyTermsEnum extend
private int maxEdits;
private final boolean raw;
- private final TermsEnum tenum;
+ private final Terms terms;
private final Term term;
private final int termText[];
private final int realPrefixLength;
@@ -85,7 +86,7 @@ public final class FuzzyTermsEnum extend
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
*
- * @param tenum Delivers terms.
+ * @param terms Delivers terms.
* @param atts {@link AttributeSource} created by the rewrite method of {@link MultiTermQuery}
* thats contains information about competitive boosts during rewrite. It is also used
* to cache DFAs between segment transitions.
@@ -94,7 +95,7 @@ public final class FuzzyTermsEnum extend
* @param prefixLength Length of required common prefix. Default value is 0.
* @throws IOException
*/
- public FuzzyTermsEnum(TermsEnum tenum, AttributeSource atts, Term term,
+ public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
final float minSimilarity, final int prefixLength) throws IOException {
if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
throw new IllegalArgumentException("fractional edit distances are not allowed");
@@ -102,7 +103,7 @@ public final class FuzzyTermsEnum extend
throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
if(prefixLength < 0)
throw new IllegalArgumentException("prefixLength cannot be less than 0");
- this.tenum = tenum;
+ this.terms = terms;
this.term = term;
// convert the string into a utf32 int[] representation for fast comparisons
@@ -143,8 +144,10 @@ public final class FuzzyTermsEnum extend
throws IOException {
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
if (editDistance < runAutomata.size()) {
- return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1)
- .toArray(new CompiledAutomaton[editDistance + 1]), lastTerm);
+ //if (BlockTreeTermsWriter.DEBUG) System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
+ final CompiledAutomaton compiled = runAutomata.get(editDistance);
+ return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRef())),
+ runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
} else {
return null;
}
@@ -153,6 +156,7 @@ public final class FuzzyTermsEnum extend
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
+ //System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
@@ -160,13 +164,14 @@ public final class FuzzyTermsEnum extend
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i);
+ //System.out.println("compute automaton n=" + i);
// constant prefix
if (realPrefixLength > 0) {
Automaton prefix = BasicAutomata.makeString(
UnicodeUtil.newString(termText, 0, realPrefixLength));
a = BasicOperations.concatenate(prefix, a);
}
- runAutomata.add(new CompiledAutomaton(a, true));
+ runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
@@ -301,65 +306,65 @@ public final class FuzzyTermsEnum extend
public BytesRef term() throws IOException {
return actualEnum.term();
}
-
+
/**
- * Implement fuzzy enumeration with automaton.
+ * Implement fuzzy enumeration with Terms.intersect.
* <p>
* This is the fastest method as opposed to LinearFuzzyTermsEnum:
* as enumeration is logarithmic to the number of terms (instead of linear)
* and comparison is linear to length of the term (rather than quadratic)
*/
- private class AutomatonFuzzyTermsEnum extends AutomatonTermsEnum {
+ private class AutomatonFuzzyTermsEnum extends FilteredTermsEnum {
private final ByteRunAutomaton matchers[];
private final BytesRef termRef;
- private final BytesRef lastTerm;
private final BoostAttribute boostAtt =
attributes().addAttribute(BoostAttribute.class);
- public AutomatonFuzzyTermsEnum(CompiledAutomaton compiled[],
- BytesRef lastTerm) throws IOException {
- super(tenum, compiled[compiled.length - 1]);
+ public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[])
+ throws IOException {
+ super(tenum, false);
this.matchers = new ByteRunAutomaton[compiled.length];
for (int i = 0; i < compiled.length; i++)
this.matchers[i] = compiled[i].runAutomaton;
- this.lastTerm = lastTerm;
termRef = new BytesRef(term.text());
}
-
+
/** finds the smallest Lev(n) DFA that accepts the term. */
@Override
protected AcceptStatus accept(BytesRef term) {
+ //System.out.println("AFTE.accept term=" + term);
int ed = matchers.length - 1;
- if (matches(term, ed)) { // we match the outer dfa
- // now compute exact edit distance
- while (ed > 0) {
- if (matches(term, ed - 1)) {
- ed--;
- } else {
- break;
- }
- }
-
- // scale to a boost and return (if similarity > minSimilarity)
- if (ed == 0) { // exact match
- boostAtt.setBoost(1.0F);
- return AcceptStatus.YES_AND_SEEK;
+ // we are wrapping either an intersect() TermsEnum or an AutomatonTermsENum,
+ // so we know the outer DFA always matches.
+ // now compute exact edit distance
+ while (ed > 0) {
+ if (matches(term, ed - 1)) {
+ ed--;
} else {
- final int codePointCount = UnicodeUtil.codePointCount(term);
- final float similarity = 1.0f - ((float) ed / (float)
- (Math.min(codePointCount, termLength)));
- if (similarity > minSimilarity) {
- boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
- return AcceptStatus.YES_AND_SEEK;
- } else {
- return AcceptStatus.NO_AND_SEEK;
- }
+ break;
}
+ }
+ //System.out.println("CHECK term=" + term.utf8ToString() + " ed=" + ed);
+
+ // scale to a boost and return (if similarity > minSimilarity)
+ if (ed == 0) { // exact match
+ boostAtt.setBoost(1.0F);
+ //System.out.println(" yes");
+ return AcceptStatus.YES;
} else {
- return AcceptStatus.NO_AND_SEEK;
+ final int codePointCount = UnicodeUtil.codePointCount(term);
+ final float similarity = 1.0f - ((float) ed / (float)
+ (Math.min(codePointCount, termLength)));
+ if (similarity > minSimilarity) {
+ boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
+ //System.out.println(" yes");
+ return AcceptStatus.YES;
+ } else {
+ return AcceptStatus.NO;
+ }
}
}
@@ -367,16 +372,8 @@ public final class FuzzyTermsEnum extend
final boolean matches(BytesRef term, int k) {
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
}
-
- /** defers to superclass, except can start at an arbitrary location */
- @Override
- protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
- if (term == null)
- term = lastTerm;
- return super.nextSeekTerm(term);
- }
}
-
+
/**
* Implement fuzzy enumeration with linear brute force.
*/
@@ -408,7 +405,7 @@ public final class FuzzyTermsEnum extend
* @throws IOException
*/
public LinearFuzzyTermsEnum() throws IOException {
- super(tenum);
+ super(terms.iterator());
this.text = new int[termLength - realPrefixLength];
System.arraycopy(termText, realPrefixLength, text, 0, text.length);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Tue Aug 23 14:06:58 2011
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.Closeable;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
@@ -69,7 +70,7 @@ import org.apache.lucene.util.ThreadInte
* synchronize on the <code>IndexSearcher</code> instance;
* use your own (non-Lucene) objects instead.</p>
*/
-public class IndexSearcher {
+public class IndexSearcher implements Closeable {
final IndexReader reader; // package private for testing!
private boolean closeReader;
@@ -267,6 +268,7 @@ public class IndexSearcher {
* If the IndexReader was supplied implicitly by specifying a directory, then
* the IndexReader is closed.
*/
+ @Override
public void close() throws IOException {
if (closeReader) {
reader.close();
@@ -882,6 +884,6 @@ public class IndexSearcher {
@Override
public String toString() {
- return "IndexSearcher(" + reader + ")";
+ return "IndexSearcher(" + reader + "; executor=" + executor + ")";
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixQuery.java Tue Aug 23 14:06:58 2011
@@ -51,7 +51,7 @@ public class PrefixQuery extends MultiTe
// no prefix -- match all terms for this field:
return tenum;
}
- return new PrefixTermsEnum(tenum, prefix);
+ return new PrefixTermsEnum(tenum, prefix.bytes());
}
/** Prints a user-readable version of this query. */
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
@@ -34,9 +33,9 @@ public class PrefixTermsEnum extends Fil
private final BytesRef prefixRef;
- public PrefixTermsEnum(TermsEnum tenum, Term prefix) throws IOException {
+ public PrefixTermsEnum(TermsEnum tenum, BytesRef prefixText) throws IOException {
super(tenum);
- setInitialSeekTerm(prefixRef = prefix.bytes());
+ setInitialSeekTerm(this.prefixRef = prefixText);
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/Scorer.java Tue Aug 23 14:06:58 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-
-import org.apache.lucene.search.BooleanClause.Occur;
+import java.util.Collection;
+import java.util.Collections;
/**
* Expert: Common scoring functionality for different types of queries.
@@ -101,83 +101,30 @@ public abstract class Scorer extends Doc
public float freq() throws IOException {
throw new UnsupportedOperationException(this + " does not implement freq()");
}
-
- /**
- * A callback to gather information from a scorer and its sub-scorers. Each
- * the top-level scorer as well as each of its sub-scorers are passed to
- * either one of the visit methods depending on their boolean relationship in
- * the query.
- * @lucene.experimental
- */
- public static abstract class ScorerVisitor<P extends Query, C extends Query, S extends Scorer> {
- /**
- * Invoked for all optional scorer
- *
- * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
- * @param child the query of the currently visited scorer
- * @param scorer the current scorer
- */
- public void visitOptional(P parent, C child, S scorer) {}
-
- /**
- * Invoked for all required scorer
- *
- * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
- * @param child the query of the currently visited scorer
- * @param scorer the current scorer
- */
- public void visitRequired(P parent, C child, S scorer) {}
-
- /**
- * Invoked for all prohibited scorer
- *
- * @param parent the parent query of the child query or <code>null</code> if the child is a top-level query
- * @param child the query of the currently visited scorer
- * @param scorer the current scorer
- */
- public void visitProhibited(P parent, C child, S scorer) {}
- }
-
- /**
- * Expert: call this to gather details for all sub-scorers for this query.
- * This can be used, in conjunction with a custom {@link Collector} to gather
- * details about how each sub-query matched the current hit.
- *
- * @param visitor a callback executed for each sub-scorer
+
+ /** returns parent Weight
* @lucene.experimental
*/
- public void visitScorers(ScorerVisitor<Query, Query, Scorer> visitor) {
- visitSubScorers(null, Occur.MUST/*must id default*/, visitor);
+ public Weight getWeight() {
+ return weight;
}
-
- /**
- * {@link Scorer} subclasses should implement this method if the subclass
- * itself contains multiple scorers to support gathering details for
- * sub-scorers via {@link ScorerVisitor}
- * <p>
- * Note: this method will throw {@link UnsupportedOperationException} if no
- * associated {@link Weight} instance is provided to
- * {@link #Scorer(Weight)}
- * </p>
- *
- * @lucene.experimental
- */
- protected void visitSubScorers(Query parent, Occur relationship,
- ScorerVisitor<Query, Query, Scorer> visitor) {
- if (weight == null)
- throw new UnsupportedOperationException();
-
- final Query q = weight.getQuery();
- switch (relationship) {
- case MUST:
- visitor.visitRequired(parent, q, this);
- break;
- case MUST_NOT:
- visitor.visitProhibited(parent, q, this);
- break;
- case SHOULD:
- visitor.visitOptional(parent, q, this);
- break;
+
+ /** Returns child sub-scorers
+ * @lucene.experimental */
+ public Collection<ChildScorer> getChildren() {
+ return Collections.emptyList();
+ }
+
+ /** a child Scorer and its relationship to its parent.
+ * the meaning of the relationship depends upon the parent query.
+ * @lucene.experimental */
+ public static class ChildScorer {
+ public final Scorer child;
+ public final String relationship;
+
+ public ChildScorer(Scorer child, String relationship) {
+ this.child = child;
+ this.relationship = relationship;
}
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java Tue Aug 23 14:06:58 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
@@ -39,10 +38,10 @@ public final class SingleTermsEnum exten
* After calling the constructor the enumeration is already pointing to the term,
* if it exists.
*/
- public SingleTermsEnum(TermsEnum tenum, Term singleTerm) throws IOException {
+ public SingleTermsEnum(TermsEnum tenum, BytesRef termText) throws IOException {
super(tenum);
- singleRef = singleTerm.bytes();
- setInitialSeekTerm(singleRef);
+ singleRef = termText;
+ setInitialSeekTerm(termText);
}
@Override
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermQuery.java Tue Aug 23 14:06:58 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.Term;
@@ -42,7 +43,7 @@ public class TermQuery extends Query {
private int docFreq;
private transient TermContext perReaderTermState;
- private class TermWeight extends Weight {
+ final class TermWeight extends Weight {
private final Similarity similarity;
private final Similarity.Stats stats;
private transient TermContext termStates;
@@ -73,21 +74,43 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
- final String field = term.field();
- final IndexReader reader = context.reader;
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
+ final TermsEnum termsEnum = getTermsEnum(context);
+ if (termsEnum == null) {
+ return null;
+ }
+ // TODO should we reuse the DocsEnum here?
+ final DocsEnum docs = termsEnum.docs(context.reader.getLiveDocs(), null);
+ assert docs != null;
+ return new TermScorer(this, docs, createDocScorer(context));
+ }
+
+ /**
+ * Creates an {@link ExactDocScorer} for this {@link TermWeight}*/
+ ExactDocScorer createDocScorer(AtomicReaderContext context)
+ throws IOException {
+ return similarity.exactDocScorer(stats, term.field(), context);
+ }
+
+ /**
+ * Returns a {@link TermsEnum} positioned at this weights Term or null if
+ * the term does not exist in the given context
+ */
+ TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
final TermState state = termStates.get(context.ord);
if (state == null) { // term is not present in that reader
- assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader";
+ assert termNotInReader(context.reader, term.field(), term.bytes()) : "no termstate found but term exists in reader term=" + term;
return null;
}
- final DocsEnum docs = reader.termDocsEnum(reader.getLiveDocs(), field, term.bytes(), state);
- assert docs != null;
- return new TermScorer(this, docs, similarity.exactDocScorer(stats, field, context));
+ //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
+ final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
+ termsEnum.seekExact(term.bytes(), state);
+ return termsEnum;
}
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert
+ //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
final Terms terms = reader.terms(field);
return terms == null || terms.docFreq(bytes) == 0;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TermScorer.java Tue Aug 23 14:06:58 2011
@@ -71,6 +71,7 @@ final class TermScorer extends Scorer {
public boolean score(Collector c, int end, int firstDocID) throws IOException {
c.setScorer(this);
while (doc < end) { // for docs in window
+ //System.out.println("TS: collect doc=" + doc);
c.collect(doc); // collect score
if (++pointer >= pointerMax) {
refillBuffer();
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java Tue Aug 23 14:06:58 2011
@@ -82,8 +82,13 @@ public abstract class TopScoreDocCollect
assert !Float.isNaN(score);
totalHits++;
+ if (score < pqTop.score) {
+ // Doesn't compete w/ bottom entry in queue
+ return;
+ }
doc += docBase;
- if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) {
+ if (score == pqTop.score && doc > pqTop.doc) {
+ // Break tie in score by doc ID:
return;
}
pqTop.doc = doc;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Tue Aug 23 14:06:58 2011
@@ -78,15 +78,38 @@ public abstract class TopTermsRewrite<Q
public void setNextEnum(TermsEnum termsEnum) throws IOException {
this.termsEnum = termsEnum;
this.termComp = termsEnum.getComparator();
+
+ assert compareToLastTerm(null);
+
// lazy init the initial ScoreTerm because comparator is not known on ctor:
if (st == null)
st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
+ // for assert:
+ private BytesRef lastTerm;
+ private boolean compareToLastTerm(BytesRef t) throws IOException {
+ if (lastTerm == null && t != null) {
+ lastTerm = new BytesRef(t);
+ } else if (t == null) {
+ lastTerm = null;
+ } else {
+ assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
+ lastTerm.copy(t);
+ }
+ return true;
+ }
+
@Override
public boolean collect(BytesRef bytes) throws IOException {
final float boost = boostAtt.getBoost();
+
+ // make sure within a single seg we always collect
+ // terms in order
+ assert compareToLastTerm(bytes);
+
+ //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
// ignore uncompetitive hits
if (stQueue.size() == maxSize) {
final ScoreTerm t = stQueue.peek();
@@ -134,9 +157,10 @@ public abstract class TopTermsRewrite<Q
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
+
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes);
- assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
+ assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term;
addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
}
query.incTotalNumberOfTerms(scoreTerms.length);
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java Tue Aug 23 14:06:58 2011
@@ -43,16 +43,30 @@ public final class ByteArrayDataInput ex
reset(bytes, 0, bytes.length);
}
+ // NOTE: sets pos to 0, which is not right if you had
+ // called reset w/ non-zero offset!!
+ public void rewind() {
+ pos = 0;
+ }
+
public int getPosition() {
return pos;
}
+ public void setPosition(int pos) {
+ this.pos = pos;
+ }
+
public void reset(byte[] bytes, int offset, int len) {
this.bytes = bytes;
pos = offset;
limit = offset + len;
}
+ public int length() {
+ return limit;
+ }
+
public boolean eof() {
return pos == limit;
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java Tue Aug 23 14:06:58 2011
@@ -19,10 +19,6 @@ package org.apache.lucene.store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.Lock;
import org.apache.lucene.util.IOUtils;
import java.util.Collection;
@@ -189,14 +185,14 @@ public abstract class CompoundFileDirect
}
@Override
- public synchronized IndexInput openInput(String id, IOContext context) throws IOException {
+ public synchronized IndexInput openInput(String fileName, IOContext context) throws IOException {
ensureOpen();
assert !openForWrite;
- id = IndexFileNames.stripSegmentName(id);
+ final String id = IndexFileNames.stripSegmentName(fileName);
final FileEntry entry = entries.get(id);
- if (entry == null)
- throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
-
+ if (entry == null) {
+ throw new IOException("No sub-file with id " + id + " found (fileName=" + fileName + " files: " + entries.keySet() + ")");
+ }
return openInputSlice(id, entry.offset, entry.length, readBufferSize);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FSDirectory.java Tue Aug 23 14:06:58 2011
@@ -448,6 +448,7 @@ public abstract class FSDirectory extend
/** output methods: */
@Override
public void flushBuffer(byte[] b, int offset, int size) throws IOException {
+ assert isOpen;
if (rateLimiter != null) {
rateLimiter.pause(size);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java Tue Aug 23 14:06:58 2011
@@ -78,11 +78,38 @@ public class FileSwitchDirectory extends
@Override
public String[] listAll() throws IOException {
Set<String> files = new HashSet<String>();
- for(String f : primaryDir.listAll()) {
- files.add(f);
+ // LUCENE-3380: either or both of our dirs could be FSDirs,
+ // but if one underlying delegate is an FSDir and mkdirs() has not
+ // yet been called, because so far everything is written to the other,
+ // in this case, we don't want to throw a NoSuchDirectoryException
+ NoSuchDirectoryException exc = null;
+ try {
+ for(String f : primaryDir.listAll()) {
+ files.add(f);
+ }
+ } catch (NoSuchDirectoryException e) {
+ exc = e;
}
- for(String f : secondaryDir.listAll()) {
- files.add(f);
+ try {
+ for(String f : secondaryDir.listAll()) {
+ files.add(f);
+ }
+ } catch (NoSuchDirectoryException e) {
+ // we got NoSuchDirectoryException from both dirs
+ // rethrow the first.
+ if (exc != null) {
+ throw exc;
+ }
+ // we got NoSuchDirectoryException from the secondary,
+ // and the primary is empty.
+ if (files.isEmpty()) {
+ throw e;
+ }
+ }
+ // we got NoSuchDirectoryException from the primary,
+ // and the secondary is empty.
+ if (exc != null && files.isEmpty()) {
+ throw exc;
}
return files.toArray(new String[files.size()]);
}
@@ -150,13 +177,19 @@ public class FileSwitchDirectory extends
return getDirectory(name).openInput(name, context);
}
+ // final due to LUCENE-3380: currently CFS backdoors the directory to create CFE
+ // by using the basic implementation and not delegating, we ensure that all
+ // openInput/createOutput requests come thru NRTCachingDirectory.
@Override
- public CompoundFileDirectory openCompoundInput(String name, IOContext context) throws IOException {
- return getDirectory(name).openCompoundInput(name, context);
+ public final CompoundFileDirectory openCompoundInput(String name, IOContext context) throws IOException {
+ return super.openCompoundInput(name, context);
}
+ // final due to LUCENE-3380: currently CFS backdoors the directory to create CFE
+ // by using the basic implementation and not delegating, we ensure that all
+ // openInput/createOutput requests come thru NRTCachingDirectory.
@Override
- public CompoundFileDirectory createCompoundOutput(String name, IOContext context) throws IOException {
- return getDirectory(name).createCompoundOutput(name, context);
+ public final CompoundFileDirectory createCompoundOutput(String name, IOContext context) throws IOException {
+ return super.createCompoundOutput(name, context);
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMFile.java Tue Aug 23 14:06:58 2011
@@ -29,7 +29,7 @@ public class RAMFile {
private long lastModified = System.currentTimeMillis();
// File used as buffer, in no RAMDirectory
- protected RAMFile() {}
+ public RAMFile() {}
RAMFile(RAMDirectory directory) {
this.directory = directory;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/store/RAMInputStream.java Tue Aug 23 14:06:58 2011
@@ -19,8 +19,10 @@ package org.apache.lucene.store;
import java.io.IOException;
-/** A memory-resident {@link IndexInput} implementation. */
-class RAMInputStream extends IndexInput implements Cloneable {
+/** A memory-resident {@link IndexInput} implementation.
+ *
+ * @lucene.internal */
+public class RAMInputStream extends IndexInput implements Cloneable {
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
private RAMFile file;
@@ -33,7 +35,7 @@ class RAMInputStream extends IndexInput
private long bufferStart;
private int bufferLength;
- RAMInputStream(RAMFile f) throws IOException {
+ public RAMInputStream(RAMFile f) throws IOException {
file = f;
length = file.length;
if (length/BUFFER_SIZE >= Integer.MAX_VALUE) {
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BitVector.java Tue Aug 23 14:06:58 2011
@@ -353,6 +353,11 @@ public final class BitVector implements
} else {
readBits(input);
}
+
+ if (version < VERSION_DGAPS_CLEARED) {
+ invertAll();
+ }
+
assert verifyCount();
} finally {
input.close();
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/BytesRef.java Tue Aug 23 14:06:58 2011
@@ -65,6 +65,18 @@ public final class BytesRef implements C
this.bytes = new byte[capacity];
}
+ /** Incoming IntsRef values must be Byte.MIN_VALUE -
+ * Byte.MAX_VALUE. */
+ public BytesRef(IntsRef intsRef) {
+ bytes = new byte[intsRef.length];
+ for(int idx=0;idx<intsRef.length;idx++) {
+ final int v = intsRef.ints[intsRef.offset + idx];
+ assert v >= Byte.MIN_VALUE && v <= Byte.MAX_VALUE;
+ bytes[idx] = (byte) v;
+ }
+ length = intsRef.length;
+ }
+
/**
* @param text Initialize the byte[] from the UTF8 bytes
* for the provided Sring. This must be well-formed
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/IOUtils.java Tue Aug 23 14:06:58 2011
@@ -19,8 +19,11 @@ package org.apache.lucene.util;
import java.io.Closeable;
import java.io.IOException;
+import java.lang.reflect.Method;
-/** @lucene.internal */
+/** This class emulates the new Java 7 "Try-With-Resources" statement.
+ * Remove once Lucene is on Java 7.
+ * @lucene.internal */
public final class IOUtils {
private IOUtils() {} // no instance
@@ -55,6 +58,7 @@ public final class IOUtils {
object.close();
}
} catch (Throwable t) {
+ addSuppressed((priorException == null) ? th : priorException, t);
if (th == null) {
th = t;
}
@@ -81,6 +85,7 @@ public final class IOUtils {
object.close();
}
} catch (Throwable t) {
+ addSuppressed((priorException == null) ? th : priorException, t);
if (th == null) {
th = t;
}
@@ -118,6 +123,7 @@ public final class IOUtils {
object.close();
}
} catch (Throwable t) {
+ addSuppressed(th, t);
if (th == null)
th = t;
}
@@ -143,6 +149,7 @@ public final class IOUtils {
object.close();
}
} catch (Throwable t) {
+ addSuppressed(th, t);
if (th == null)
th = t;
}
@@ -155,5 +162,31 @@ public final class IOUtils {
throw new RuntimeException(th);
}
}
+
+ /** This reflected {@link Method} is {@code null} before Java 7 */
+ private static final Method SUPPRESS_METHOD;
+ static {
+ Method m;
+ try {
+ m = Throwable.class.getMethod("addSuppressed", Throwable.class);
+ } catch (Exception e) {
+ m = null;
+ }
+ SUPPRESS_METHOD = m;
+ }
+
+ /** adds a Throwable to the list of suppressed Exceptions of the first Throwable (if Java 7 is detected)
+ * @param exception this exception should get the suppressed one added
+ * @param suppressed the suppressed exception
+ */
+ private static final void addSuppressed(Throwable exception, Throwable suppressed) {
+ if (SUPPRESS_METHOD != null && exception != null && suppressed != null) {
+ try {
+ SUPPRESS_METHOD.invoke(exception, suppressed);
+ } catch (Exception e) {
+ // ignore any exceptions caused by invoking (e.g. security constraints)
+ }
+ }
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/TermContext.java Tue Aug 23 14:06:58 2011
@@ -21,14 +21,13 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.index.IndexReader.ReaderContext;
-import org.apache.lucene.index.TermsEnum.SeekStatus;
/**
* Maintains a {@link IndexReader} {@link TermState} view over
@@ -45,6 +44,9 @@ public final class TermContext {
private int docFreq;
private long totalTermFreq;
+ //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+
/**
* Creates an empty {@link TermContext} from a {@link ReaderContext}
*/
@@ -85,7 +87,9 @@ public final class TermContext {
final BytesRef bytes = term.bytes();
final TermContext perReaderTermState = new TermContext(context);
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
+ //if (DEBUG) System.out.println("prts.build term=" + term);
for (int i = 0; i < leaves.length; i++) {
+ //if (DEBUG) System.out.println(" r=" + leaves[i].reader);
final Fields fields = leaves[i].reader.fields();
if (fields != null) {
final Terms terms = fields.terms(field);
@@ -93,6 +97,7 @@ public final class TermContext {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
if (termsEnum.seekExact(bytes, cache)) {
final TermState termState = termsEnum.termState();
+ //if (DEBUG) System.out.println(" found");
perReaderTermState.register(termState, leaves[i].ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Builder.java Tue Aug 23 14:06:58 2011
@@ -53,6 +53,8 @@ public class Builder<T> {
private final FST<T> fst;
private final T NO_OUTPUT;
+ // private static final boolean DEBUG = false;
+
// simplistic pruning: we prune node (and all following
// nodes) if less than this number of terms go through it:
private final int minSuffixCount1;
@@ -73,13 +75,21 @@ public class Builder<T> {
// current "frontier"
private UnCompiledNode<T>[] frontier;
+ // Expert: you pass an instance of this if you want to do
+ // something "custom" as suffixes are "frozen":
+ public static abstract class FreezeTail<T> {
+ public abstract void freeze(final UnCompiledNode<T>[] frontier, int prefixLenPlus1, IntsRef prevInput) throws IOException;
+ }
+
+ private final FreezeTail<T> freezeTail;
+
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
- * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs)} with
+ * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs, FreezeTail)} with
* pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
- this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs);
+ this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
}
/**
@@ -120,9 +130,11 @@ public class Builder<T> {
* singleton output object.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
- boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs) {
+ boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
+ FreezeTail<T> freezeTail) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
+ this.freezeTail = freezeTail;
this.doShareNonSingletonNodes = doShareNonSingletonNodes;
this.shareMaxTailLength = shareMaxTailLength;
fst = new FST<T>(inputType, outputs);
@@ -179,94 +191,100 @@ public class Builder<T> {
return fn;
}
- private void compilePrevTail(int prefixLenPlus1) throws IOException {
- assert prefixLenPlus1 >= 1;
- //System.out.println(" compileTail " + prefixLenPlus1);
- for(int idx=lastInput.length; idx >= prefixLenPlus1; idx--) {
- boolean doPrune = false;
- boolean doCompile = false;
+ private void freezeTail(int prefixLenPlus1) throws IOException {
+ if (freezeTail != null) {
+ // Custom plugin:
+ freezeTail.freeze(frontier, prefixLenPlus1, lastInput);
+ } else {
+ //System.out.println(" compileTail " + prefixLenPlus1);
+ final int downTo = Math.max(1, prefixLenPlus1);
+ for(int idx=lastInput.length; idx >= downTo; idx--) {
- final UnCompiledNode<T> node = frontier[idx];
- final UnCompiledNode<T> parent = frontier[idx-1];
+ boolean doPrune = false;
+ boolean doCompile = false;
- if (node.inputCount < minSuffixCount1) {
- doPrune = true;
- doCompile = true;
- } else if (idx > prefixLenPlus1) {
- // prune if parent's inputCount is less than suffixMinCount2
- if (parent.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && parent.inputCount == 1) {
- // my parent, about to be compiled, doesn't make the cut, so
- // I'm definitely pruned
-
- // if pruneCount2 is 1, we keep only up
- // until the 'distinguished edge', ie we keep only the
- // 'divergent' part of the FST. if my parent, about to be
- // compiled, has inputCount 1 then we are already past the
- // distinguished edge. NOTE: this only works if
- // the FST outputs are not "compressible" (simple
- // ords ARE compressible).
+ final UnCompiledNode<T> node = frontier[idx];
+ final UnCompiledNode<T> parent = frontier[idx-1];
+
+ if (node.inputCount < minSuffixCount1) {
doPrune = true;
+ doCompile = true;
+ } else if (idx > prefixLenPlus1) {
+ // prune if parent's inputCount is less than suffixMinCount2
+ if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1)) {
+ // my parent, about to be compiled, doesn't make the cut, so
+ // I'm definitely pruned
+
+ // if minSuffixCount2 is 1, we keep only up
+ // until the 'distinguished edge', ie we keep only the
+ // 'divergent' part of the FST. if my parent, about to be
+ // compiled, has inputCount 1 then we are already past the
+ // distinguished edge. NOTE: this only works if
+ // the FST outputs are not "compressible" (simple
+ // ords ARE compressible).
+ doPrune = true;
+ } else {
+ // my parent, about to be compiled, does make the cut, so
+ // I'm definitely not pruned
+ doPrune = false;
+ }
+ doCompile = true;
} else {
- // my parent, about to be compiled, does make the cut, so
- // I'm definitely not pruned
- doPrune = false;
+ // if pruning is disabled (count is 0) we can always
+ // compile current node
+ doCompile = minSuffixCount2 == 0;
}
- doCompile = true;
- } else {
- // if pruning is disabled (count is 0) we can always
- // compile current node
- doCompile = minSuffixCount2 == 0;
- }
- //System.out.println(" label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
-
- if (node.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && node.inputCount == 1) {
- // drop all arcs
- for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
- @SuppressWarnings("unchecked") final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
- target.clear();
- }
- node.numArcs = 0;
- }
+ //System.out.println(" label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
- if (doPrune) {
- // this node doesn't make it -- deref it
- node.clear();
- parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
- } else {
-
- if (minSuffixCount2 != 0) {
- compileAllTargets(node, lastInput.length-idx);
+ if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1)) {
+ // drop all arcs
+ for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+ @SuppressWarnings("unchecked") final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
+ target.clear();
+ }
+ node.numArcs = 0;
}
- final T nextFinalOutput = node.output;
- // We "fake" the node as being final if it has no
- // outgoing arcs; in theory we could leave it
- // as non-final (the FST can represent this), but
- // FSTEnum, Util, etc., have trouble w/ non-final
- // dead-end states:
- final boolean isFinal = node.isFinal || node.numArcs == 0;
-
- if (doCompile) {
- // this node makes it and we now compile it. first,
- // compile any targets that were previously
- // undecided:
- parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
- compileNode(node, 1+lastInput.length-idx),
- nextFinalOutput,
- isFinal);
+ if (doPrune) {
+ // this node doesn't make it -- deref it
+ node.clear();
+ parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
} else {
- // replaceLast just to install
- // nextFinalOutput/isFinal onto the arc
- parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
- node,
- nextFinalOutput,
- isFinal);
- // this node will stay in play for now, since we are
- // undecided on whether to prune it. later, it
- // will be either compiled or pruned, so we must
- // allocate a new node:
- frontier[idx] = new UnCompiledNode<T>(this, idx);
+
+ if (minSuffixCount2 != 0) {
+ compileAllTargets(node, lastInput.length-idx);
+ }
+ final T nextFinalOutput = node.output;
+
+ // We "fake" the node as being final if it has no
+ // outgoing arcs; in theory we could leave it
+ // as non-final (the FST can represent this), but
+ // FSTEnum, Util, etc., have trouble w/ non-final
+ // dead-end states:
+ final boolean isFinal = node.isFinal || node.numArcs == 0;
+
+ if (doCompile) {
+ // this node makes it and we now compile it. first,
+ // compile any targets that were previously
+ // undecided:
+ parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+ compileNode(node, 1+lastInput.length-idx),
+ nextFinalOutput,
+ isFinal);
+ } else {
+ // replaceLast just to install
+ // nextFinalOutput/isFinal onto the arc
+ parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+ node,
+ nextFinalOutput,
+ isFinal);
+ // this node will stay in play for now, since we are
+ // undecided on whether to prune it. later, it
+ // will be either compiled or pruned, so we must
+ // allocate a new node:
+ frontier[idx] = new UnCompiledNode<T>(this, idx);
+ }
}
}
}
@@ -320,11 +338,36 @@ public class Builder<T> {
add(scratchIntsRef, output);
}
+ // for debugging
+ /*
+ private String toString(BytesRef b) {
+ try {
+ return b.utf8ToString() + " " + b;
+ } catch (Throwable t) {
+ return b.toString();
+ }
+ }
+ */
+
/** It's OK to add the same input twice in a row with
* different outputs, as long as outputs impls the merge
* method. */
public void add(IntsRef input, T output) throws IOException {
- //System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
+ /*
+ if (DEBUG) {
+ BytesRef b = new BytesRef(input.length);
+ for(int x=0;x<input.length;x++) {
+ b.bytes[x] = (byte) input.ints[x];
+ }
+ b.length = input.length;
+ if (output == NO_OUTPUT) {
+ System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
+ } else {
+ System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
+ }
+ }
+ */
+
assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
assert validOutput(output);
@@ -346,8 +389,8 @@ public class Builder<T> {
int pos2 = input.offset;
final int pos1Stop = Math.min(lastInput.length, input.length);
while(true) {
- //System.out.println(" incr " + pos1);
frontier[pos1].inputCount++;
+ //System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
if (pos1 >= pos1Stop || lastInput.ints[pos1] != input.ints[pos2]) {
break;
}
@@ -368,13 +411,12 @@ public class Builder<T> {
// minimize/compile states from previous input's
// orphan'd suffix
- compilePrevTail(prefixLenPlus1);
+ freezeTail(prefixLenPlus1);
// init tail states for current input
for(int idx=prefixLenPlus1;idx<=input.length;idx++) {
frontier[idx-1].addArc(input.ints[input.offset + idx - 1],
frontier[idx]);
- //System.out.println(" incr tail " + idx);
frontier[idx].inputCount++;
}
@@ -433,34 +475,25 @@ public class Builder<T> {
* nothing is accepted by the FST. */
public FST<T> finish() throws IOException {
+ final UnCompiledNode<T> root = frontier[0];
+
// minimize nodes in the last word's suffix
- compilePrevTail(1);
- //System.out.println("finish: inputCount=" + frontier[0].inputCount);
- if (frontier[0].inputCount < minSuffixCount1 || frontier[0].inputCount < minSuffixCount2 || frontier[0].numArcs == 0) {
+ freezeTail(0);
+ if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
if (fst.emptyOutput == null) {
return null;
} else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
// empty string got pruned
return null;
- } else {
- fst.finish(compileNode(frontier[0], lastInput.length).address);
- //System.out.println("compile addr = " + fst.getStartNode());
- return fst;
}
} else {
if (minSuffixCount2 != 0) {
- compileAllTargets(frontier[0], lastInput.length);
+ compileAllTargets(root, lastInput.length);
}
- //System.out.println("NOW: " + frontier[0].numArcs);
- fst.finish(compileNode(frontier[0], lastInput.length).address);
}
+ //if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
+ fst.finish(compileNode(root, lastInput.length).address);
- /*
- if (dedupHash != null) {
- System.out.println("NH: " + dedupHash.count());
- }
- */
-
return fst;
}
@@ -479,7 +512,7 @@ public class Builder<T> {
}
}
- static class Arc<T> {
+ public static class Arc<T> {
public int label; // really an "unsigned" byte
public Node target;
public boolean isFinal;
@@ -502,16 +535,20 @@ public class Builder<T> {
}
}
- static final class UnCompiledNode<T> implements Node {
+ public static final class UnCompiledNode<T> implements Node {
final Builder<T> owner;
- int numArcs;
- Arc<T>[] arcs;
- T output;
- boolean isFinal;
- long inputCount;
+ public int numArcs;
+ public Arc<T>[] arcs;
+ // TODO: instead of recording isFinal/output on the
+ // node, maybe we should use -1 arc to mean "end" (like
+ // we do when reading the FST). Would simplify much
+ // code here...
+ public T output;
+ public boolean isFinal;
+ public long inputCount;
/** This node's depth, starting from the automaton root. */
- final int depth;
+ public final int depth;
/**
* @param depth
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java Tue Aug 23 14:06:58 2011
@@ -133,6 +133,6 @@ public final class ByteSequenceOutputs e
@Override
public String outputToString(BytesRef output) {
- return output.utf8ToString();
+ return output.toString();
}
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/FST.java Tue Aug 23 14:06:58 2011
@@ -123,7 +123,7 @@ public class FST<T> {
public int label;
public T output;
- int target;
+ public int target;
byte flags;
public T nextFinalOutput;
@@ -274,6 +274,10 @@ public class FST<T> {
}
}
+ public T getEmptyOutput() {
+ return emptyOutput;
+ }
+
void setEmptyOutput(T v) throws IOException {
if (emptyOutput != null) {
emptyOutput = outputs.merge(emptyOutput, v);
@@ -597,9 +601,9 @@ public class FST<T> {
arc.label = END_LABEL;
arc.output = follow.nextFinalOutput;
if (follow.target <= 0) {
- arc.flags = BIT_LAST_ARC;
+ arc.flags = BIT_LAST_ARC | BIT_FINAL_ARC;
} else {
- arc.flags = 0;
+ arc.flags = BIT_FINAL_ARC;
arc.nextArc = follow.target;
}
//System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
@@ -609,8 +613,7 @@ public class FST<T> {
}
}
- // Not private because NodeHash needs access:
- Arc<T> readFirstRealArc(int address, Arc<T> arc) throws IOException {
+ public Arc<T> readFirstRealArc(int address, Arc<T> arc) throws IOException {
final BytesReader in = getBytesReader(address);
@@ -693,7 +696,9 @@ public class FST<T> {
return readLabel(in);
}
- Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
+ /** Never returns null, but you should never call this if
+ * arc.isLast() is true. */
+ public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
// this is a continuing arc in a fixed array
if (arc.bytesPerArc != 0) {
// arcs are at fixed entries
@@ -925,7 +930,7 @@ public class FST<T> {
}
}
- final BytesReader getBytesReader(int pos) {
+ public final BytesReader getBytesReader(int pos) {
// TODO: maybe re-use via ThreadLocal?
return new BytesReader(pos);
}
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java Tue Aug 23 14:06:58 2011
@@ -39,8 +39,8 @@ import org.apache.lucene.store.DataOutpu
public final class UpToTwoPositiveIntOutputs extends Outputs<Object> {
public final static class TwoLongs {
- final long first;
- final long second;
+ public final long first;
+ public final long second;
public TwoLongs(long first, long second) {
this.first = first;
Modified: lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/java/org/apache/lucene/util/fst/Util.java Tue Aug 23 14:06:58 2011
@@ -213,6 +213,7 @@ public final class Util {
// Shape for states.
final String stateShape = "circle";
+ final String finalStateShape = "doublecircle";
// Emit DOT prologue.
out.write("digraph FST {\n");
@@ -223,12 +224,34 @@ public final class Util {
}
emitDotState(out, "initial", "point", "white", "");
- emitDotState(out, Integer.toString(startArc.target), stateShape,
- fst.isExpandedTarget(startArc) ? expandedNodeColor : null,
- "");
- out.write(" initial -> " + startArc.target + "\n");
final T NO_OUTPUT = fst.outputs.getNoOutput();
+
+ // final FST.Arc<T> scratchArc = new FST.Arc<T>();
+
+ {
+ final String stateColor;
+ if (fst.isExpandedTarget(startArc)) {
+ stateColor = expandedNodeColor;
+ } else {
+ stateColor = null;
+ }
+
+ final boolean isFinal;
+ final T finalOutput;
+ if (startArc.isFinal()) {
+ isFinal = true;
+ finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
+ } else {
+ isFinal = false;
+ finalOutput = null;
+ }
+
+ emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
+ }
+
+ out.write(" initial -> " + startArc.target + "\n");
+
int level = 0;
while (!nextLevelQueue.isEmpty()) {
@@ -240,19 +263,48 @@ public final class Util {
out.write("\n // Transitions and states at level: " + level + "\n");
while (!thisLevelQueue.isEmpty()) {
final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
-
if (fst.targetHasArcs(arc)) {
// scan all arcs
final int node = arc.target;
fst.readFirstTargetArc(arc, arc);
-
+
+ if (arc.label == FST.END_LABEL) {
+ // Skip it -- prior recursion took this into account already
+ assert !arc.isLast();
+ fst.readNextArc(arc);
+ }
+
while (true) {
+
// Emit the unseen state and add it to the queue for the next level.
if (arc.target >= 0 && !seen.get(arc.target)) {
- final boolean isExpanded = fst.isExpandedTarget(arc);
- emitDotState(out, Integer.toString(arc.target), stateShape,
- isExpanded ? expandedNodeColor : null,
- labelStates ? Integer.toString(arc.target) : "");
+
+ /*
+ boolean isFinal = false;
+ T finalOutput = null;
+ fst.readFirstTargetArc(arc, scratchArc);
+ if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
+ // target is final
+ isFinal = true;
+ finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
+ System.out.println("dot hit final label=" + (char) scratchArc.label);
+ }
+ */
+ final String stateColor;
+ if (fst.isExpandedTarget(arc)) {
+ stateColor = expandedNodeColor;
+ } else {
+ stateColor = null;
+ }
+
+ final String finalOutput;
+ if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
+ finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
+ } else {
+ finalOutput = "";
+ }
+
+ emitDotState(out, Integer.toString(arc.target), arc.isFinal() ? finalStateShape : stateShape, stateColor, finalOutput);
seen.set(arc.target);
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
sameLevelStates.add(arc.target);
@@ -265,15 +317,19 @@ public final class Util {
outs = "";
}
- final String cl;
- if (arc.label == FST.END_LABEL) {
- cl = "~";
- } else {
- cl = printableLabel(arc.label);
+ if (!fst.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
+ // Tricky special case: sometimes, due to
+ // pruning, the builder can [sillily] produce
+ // an FST with an arc into the final end state
+ // (-1) but also with a next final output; in
+ // this case we pull that output up onto this
+ // arc
+ outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
}
- out.write(" " + node + " -> " + arc.target + " [label=\"" + cl + outs + "\"]\n");
-
+ assert arc.label != FST.END_LABEL;
+ out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"]\n");
+
// Break the loop if we're on the last arc of this state.
if (arc.isLast()) {
break;
@@ -295,7 +351,7 @@ public final class Util {
}
// Emit terminating state (always there anyway).
- out.write(" -1 [style=filled, color=black, shape=circle, label=\"\"]\n\n");
+ out.write(" -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
out.write(" {rank=sink; -1 }\n");
out.write("}\n");
Modified: lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java Tue Aug 23 14:06:58 2011
@@ -29,6 +29,9 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.store.Directory;
@@ -251,4 +254,21 @@ class DocHelper {
public static int numFields(Document doc) {
return doc.getFields().size();
}
+
+ public static Document createDocument(int n, String indexName, int numFields) {
+ StringBuilder sb = new StringBuilder();
+ Document doc = new Document();
+ doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+ doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+ sb.append("a");
+ sb.append(n);
+ doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+ sb.append(" b");
+ sb.append(n);
+ for (int i = 1; i < numFields; i++) {
+ doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES,
+ Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
+ }
+ return doc;
+ }
}
Modified: lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1160700&r1=1160699&r2=1160700&view=diff
==============================================================================
--- lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/flexscoring/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Tue Aug 23 14:06:58 2011
@@ -29,6 +29,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.index.IndexWriter; // javadoc
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -59,7 +60,7 @@ public class RandomIndexWriter implement
private final Random r;
- public MockIndexWriter(Random r,Directory dir, IndexWriterConfig conf) throws IOException {
+ public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) throws IOException {
super(dir, conf);
// must make a private random since our methods are
// called from different threads; else test failures may
@@ -298,6 +299,10 @@ public class RandomIndexWriter implement
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
w.deleteDocuments(term);
}
+
+ public void deleteDocuments(Query q) throws CorruptIndexException, IOException {
+ w.deleteDocuments(q);
+ }
public void commit() throws CorruptIndexException, IOException {
w.commit();
@@ -321,11 +326,16 @@ public class RandomIndexWriter implement
}
private boolean doRandomOptimize = true;
+ private boolean doRandomOptimizeAssert = true;
public void setDoRandomOptimize(boolean v) {
doRandomOptimize = v;
}
+ public void setDoRandomOptimizeAssert(boolean v) {
+ doRandomOptimizeAssert = v;
+ }
+
private void doRandomOptimize() throws IOException {
if (doRandomOptimize) {
final int segCount = w.getSegmentCount();
@@ -336,7 +346,7 @@ public class RandomIndexWriter implement
// partial optimize
final int limit = _TestUtil.nextInt(r, 1, segCount);
w.optimize(limit);
- assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
+ assert !doRandomOptimizeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
}
switchDoDocValues();
@@ -354,6 +364,9 @@ public class RandomIndexWriter implement
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.getReader: use NRT reader");
}
+ if (r.nextInt(5) == 1) {
+ w.commit();
+ }
return w.getReader(applyDeletions);
} else {
if (LuceneTestCase.VERBOSE) {
@@ -361,7 +374,11 @@ public class RandomIndexWriter implement
}
w.commit();
switchDoDocValues();
- return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
+ if (r.nextBoolean()) {
+ return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
+ } else {
+ return w.getReader(applyDeletions);
+ }
}
}