You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2010/07/05 10:33:27 UTC
svn commit: r960484 [2/2] - in /lucene/dev/trunk: lucene/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/
lucene/contrib/instantiated/src/java/org/apa...
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Mon Jul 5 08:33:25 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.PagedBytes;
/**
* An abstract {@link Query} that matches documents
@@ -177,11 +178,6 @@ public abstract class MultiTermQuery ext
private abstract static class BooleanQueryRewrite extends RewriteMethod {
protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
-
- if (query.field == null) {
- throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
- }
-
final Fields fields = MultiFields.getFields(reader);
if (fields == null) {
// reader has no fields
@@ -203,10 +199,9 @@ public abstract class MultiTermQuery ext
termsEnum.attributes().addAttribute(BoostAttribute.class);
collector.boostAtt = boostAtt;
int count = 0;
- BytesRef term;
- final Term placeholderTerm = new Term(query.field);
- while ((term = termsEnum.next()) != null) {
- if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) {
+ BytesRef bytes;
+ while ((bytes = termsEnum.next()) != null) {
+ if (collector.collect(bytes, boostAtt.getBoost())) {
count++;
} else {
break;
@@ -217,15 +212,15 @@ public abstract class MultiTermQuery ext
}
protected static abstract class TermCollector {
- /** this field is only set if a boostAttribute is used (e.g. {@link FuzzyTermsEnum}) */
private BoostAttribute boostAtt = null;
/** return false to stop collecting */
- public abstract boolean collect(Term t, float boost) throws IOException;
+ public abstract boolean collect(BytesRef bytes, float boost) throws IOException;
/** set the minimum boost as a hint for the term producer */
protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
- if (boostAtt != null) boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+ assert boostAtt != null;
+ boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
}
}
}
@@ -234,9 +229,11 @@ public abstract class MultiTermQuery ext
@Override
public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final BooleanQuery result = new BooleanQuery(true);
+ final Term placeholderTerm = new Term(query.field);
query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() {
- public boolean collect(Term t, float boost) {
- TermQuery tq = new TermQuery(t); // found a match
+ public boolean collect(BytesRef bytes, float boost) {
+ // add new TQ, we must clone the term, else it may get overwritten!
+ TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)));
tq.setBoost(query.getBoost() * boost); // set the boost
result.add(tq, BooleanClause.Occur.SHOULD); // add to query
return true;
@@ -297,16 +294,16 @@ public abstract class MultiTermQuery ext
protected abstract Query getQuery(Term term);
@Override
- public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+ public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
collectTerms(reader, query, new TermCollector() {
- public boolean collect(Term t, float boost) {
+ public boolean collect(BytesRef bytes, float boost) {
// ignore uncompetetive hits
if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost)
return true;
- // add new entry in PQ
- st.term = t;
+ // add new entry in PQ, we must clone the term, else it may get overwritten!
+ st.bytes.copy(bytes);
st.boost = boost;
stQueue.offer(st);
// possibly drop entries from queue
@@ -319,9 +316,11 @@ public abstract class MultiTermQuery ext
private ScoreTerm st = new ScoreTerm();
});
+ final Term placeholderTerm = new Term(query.field);
final BooleanQuery bq = new BooleanQuery(true);
for (final ScoreTerm st : stQueue) {
- Query tq = getQuery(st.term); // found a match
+ // add new query, we must clone the term, else it may get overwritten!
+ Query tq = getQuery(placeholderTerm.createTerm(st.bytes));
tq.setBoost(query.getBoost() * st.boost); // set the boost
bq.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
@@ -348,12 +347,13 @@ public abstract class MultiTermQuery ext
}
private static class ScoreTerm implements Comparable<ScoreTerm> {
- public Term term;
+ public final BytesRef bytes = new BytesRef();
public float boost;
public int compareTo(ScoreTerm other) {
if (this.boost == other.boost)
- return other.term.compareTo(this.term);
+ // TODO: is it OK to use default compare here?
+ return other.bytes.compareTo(this.bytes);
else
return Float.compare(this.boost, other.boost);
}
@@ -530,58 +530,67 @@ public abstract class MultiTermQuery ext
final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
- final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
+ final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit);
collectTerms(reader, query, col);
if (col.hasCutOff) {
return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
+ } else if (col.termCount == 0) {
+ return new BooleanQuery(true);
} else {
- final Query result;
- if (col.pendingTerms.isEmpty()) {
- result = new BooleanQuery(true);
- } else {
- BooleanQuery bq = new BooleanQuery(true);
- for(Term term : col.pendingTerms) {
- TermQuery tq = new TermQuery(term);
- bq.add(tq, BooleanClause.Occur.SHOULD);
+ final PagedBytes.Reader bytesReader = col.pendingTerms.freeze(false);
+ try {
+ final BooleanQuery bq = new BooleanQuery(true);
+ final Term placeholderTerm = new Term(query.field);
+ long start = col.startOffset;
+ for(int i = 0; i < col.termCount; i++) {
+ final BytesRef bytes = new BytesRef();
+ start = bytesReader.fillUsingLengthPrefix3(bytes, start);
+ bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD);
}
// Strip scores
- result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
result.setBoost(query.getBoost());
+ query.incTotalNumberOfTerms(col.termCount);
+ return result;
+ } finally {
+ bytesReader.close();
}
- query.incTotalNumberOfTerms(col.pendingTerms.size());
- return result;
}
}
private static final class CutOffTermCollector extends TermCollector {
- CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
+ CutOffTermCollector(IndexReader reader, String field, int docCountCutoff, int termCountLimit) {
this.reader = reader;
+ this.field = field;
this.docCountCutoff = docCountCutoff;
this.termCountLimit = termCountLimit;
}
- public boolean collect(Term t, float boost) throws IOException {
- pendingTerms.add(t);
- if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
+ public boolean collect(BytesRef bytes, float boost) throws IOException {
+ termCount++;
+ if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
hasCutOff = true;
return false;
}
+ pendingTerms.copyUsingLengthPrefix(bytes);
// Loading the TermInfo from the terms dict here
// should not be costly, because 1) the
// query/filter will load the TermInfo when it
// runs, and 2) the terms dict has a cache:
- // @deprecated: in 4.0 use BytesRef for collectTerms()
- docVisitCount += reader.docFreq(t);
+ docVisitCount += reader.docFreq(field, bytes);
return true;
}
int docVisitCount = 0;
boolean hasCutOff = false;
+ int termCount = 0;
final IndexReader reader;
+ final String field;
final int docCountCutoff, termCountLimit;
- final ArrayList<Term> pendingTerms = new ArrayList<Term>();
+ final PagedBytes pendingTerms = new PagedBytes(15); // max term size is 32 KiB
+ final long startOffset = pendingTerms.getPointer();
}
@Override
@@ -647,18 +656,7 @@ public abstract class MultiTermQuery ext
*/
public MultiTermQuery(final String field) {
this.field = field;
- }
-
- /**
- * Constructs a query matching terms that cannot be represented with a single
- * Term.
- * @deprecated Use {@link #MultiTermQuery(String)}, as the flex branch can
- * only work on one field per terms enum. If you override
- * {@link #getTermsEnum(IndexReader)}, you cannot use this ctor.
- */
- @Deprecated
- public MultiTermQuery() {
- this(null);
+ assert field != null;
}
/** Returns the field name for this query */
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Mon Jul 5 08:33:25 2010
@@ -106,10 +106,6 @@ public class MultiTermQueryWrapperFilter
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
- if (query.field == null) {
- throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
- }
-
final Fields fields = MultiFields.getFields(reader);
if (fields == null) {
// reader has no fields
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon Jul 5 08:33:25 2010
@@ -184,15 +184,14 @@ public class PhraseQuery extends Query {
final Bits delDocs = MultiFields.getDeletedDocs(reader);
for (int i = 0; i < terms.size(); i++) {
final Term t = terms.get(i);
- final BytesRef text = new BytesRef(t.text());
DocsAndPositionsEnum postingsEnum = MultiFields.getTermPositionsEnum(reader,
delDocs,
t.field(),
- text);
+ t.bytes());
// PhraseQuery on a field that did not index
// positions.
if (postingsEnum == null) {
- if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), text) != null) {
+ if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), t.bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
} else {
@@ -200,7 +199,7 @@ public class PhraseQuery extends Query {
return null;
}
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), text), positions.get(i).intValue());
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
}
// sort by increasing docFreq order
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java Mon Jul 5 08:33:25 2010
@@ -46,7 +46,7 @@ public class PrefixQuery extends MultiTe
@Override
protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
- if (prefix.text().length() == 0) {
+ if (prefix.bytes().length == 0) {
// no prefix -- match all terms for this field:
final Terms terms = MultiFields.getTerms(reader, getField());
return (terms != null) ? terms.iterator() : TermsEnum.EMPTY;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java Mon Jul 5 08:33:25 2010
@@ -36,7 +36,7 @@ public class PrefixTermsEnum extends Fil
public PrefixTermsEnum(IndexReader reader, Term prefix) throws IOException {
super(reader, prefix.field());
- setInitialSeekTerm(prefixRef = new BytesRef(prefix.text()));
+ setInitialSeekTerm(prefixRef = prefix.bytes());
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java Mon Jul 5 08:33:25 2010
@@ -29,14 +29,16 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.util.BytesRef;
/**
*
*
**/
public class QueryTermVector implements TermFreqVector {
- private String [] terms = new String[0];
+ private BytesRef [] terms = new BytesRef[0];
private int [] termFreqs = new int[0];
public String getField() { return null; }
@@ -45,7 +47,7 @@ public class QueryTermVector implements
*
* @param queryTerms The original list of terms from the query, can contain duplicates
*/
- public QueryTermVector(String [] queryTerms) {
+ public QueryTermVector(BytesRef [] queryTerms) {
processTerms(queryTerms);
}
@@ -56,35 +58,37 @@ public class QueryTermVector implements
TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
if (stream != null)
{
- List<String> terms = new ArrayList<String>();
+ List<BytesRef> terms = new ArrayList<BytesRef>();
try {
boolean hasMoreTokens = false;
stream.reset();
- final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+ final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
hasMoreTokens = stream.incrementToken();
while (hasMoreTokens) {
- terms.add(termAtt.toString());
+ BytesRef bytes = new BytesRef();
+ termAtt.toBytesRef(bytes);
+ terms.add(bytes);
hasMoreTokens = stream.incrementToken();
}
- processTerms(terms.toArray(new String[terms.size()]));
+ processTerms(terms.toArray(new BytesRef[terms.size()]));
} catch (IOException e) {
}
}
}
}
- private void processTerms(String[] queryTerms) {
+ private void processTerms(BytesRef[] queryTerms) {
if (queryTerms != null) {
Arrays.sort(queryTerms);
- Map<String,Integer> tmpSet = new HashMap<String,Integer>(queryTerms.length);
+ Map<BytesRef,Integer> tmpSet = new HashMap<BytesRef,Integer>(queryTerms.length);
//filter out duplicates
- List<String> tmpList = new ArrayList<String>(queryTerms.length);
+ List<BytesRef> tmpList = new ArrayList<BytesRef>(queryTerms.length);
List<Integer> tmpFreqs = new ArrayList<Integer>(queryTerms.length);
int j = 0;
for (int i = 0; i < queryTerms.length; i++) {
- String term = queryTerms[i];
+ BytesRef term = queryTerms[i];
Integer position = tmpSet.get(term);
if (position == null) {
tmpSet.put(term, Integer.valueOf(j++));
@@ -112,7 +116,7 @@ public class QueryTermVector implements
sb.append('{');
for (int i=0; i<terms.length; i++) {
if (i>0) sb.append(", ");
- sb.append(terms[i]).append('/').append(termFreqs[i]);
+ sb.append(terms[i].utf8ToString()).append('/').append(termFreqs[i]);
}
sb.append('}');
return sb.toString();
@@ -123,7 +127,7 @@ public class QueryTermVector implements
return terms.length;
}
- public String[] getTerms() {
+ public BytesRef[] getTerms() {
return terms;
}
@@ -131,12 +135,12 @@ public class QueryTermVector implements
return termFreqs;
}
- public int indexOf(String term) {
+ public int indexOf(BytesRef term) {
int res = Arrays.binarySearch(terms, term);
return res >= 0 ? res : -1;
}
- public int[] indexesOf(String[] terms, int start, int len) {
+ public int[] indexesOf(BytesRef[] terms, int start, int len) {
int res[] = new int[len];
for (int i=0; i < len; i++) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java Mon Jul 5 08:33:25 2010
@@ -41,7 +41,7 @@ public final class SingleTermsEnum exten
*/
public SingleTermsEnum(IndexReader reader, Term singleTerm) throws IOException {
super(reader, singleTerm.field());
- singleRef = new BytesRef(singleTerm.text());
+ singleRef = singleTerm.bytes();
setInitialSeekTerm(singleRef);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Mon Jul 5 08:33:25 2010
@@ -75,7 +75,7 @@ public class TermQuery extends Query {
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
// NOTE: debateably, the caller should never pass in a
// multi reader...
- DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+ DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), term.bytes());
if (docs == null) {
return null;
}
@@ -118,7 +118,7 @@ public class TermQuery extends Query {
Explanation tfExplanation = new Explanation();
int tf = 0;
- DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+ DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), term.bytes());
if (docs != null) {
int newDoc = docs.advance(doc);
if (newDoc == doc) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Mon Jul 5 08:33:25 2010
@@ -85,16 +85,15 @@ public class SpanTermQuery extends SpanQ
public Spans getSpans(final IndexReader reader) throws IOException {
// NOTE: debateably, the caller should never pass in a
// multi reader...
- final BytesRef textBytes = new BytesRef(term.text());
final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(reader,
MultiFields.getDeletedDocs(reader),
term.field(),
- textBytes);
+ term.bytes());
if (postings != null) {
return new TermSpans(postings, term);
} else {
- if (MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), textBytes) != null) {
+ if (MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), term.bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")");
} else {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java Mon Jul 5 08:33:25 2010
@@ -77,6 +77,16 @@ public final class BytesRef implements C
this();
copy(text);
}
+
+ /**
+ * @param text Initialize the byte[] from the UTF8 bytes
+ * for the provided array. This must be well-formed
+ * unicode text, with no unpaired surrogates or U+FFFF.
+ */
+ public BytesRef(char text[], int offset, int length) {
+ this(length * 4);
+ copy(text, offset, length);
+ }
public BytesRef(BytesRef other) {
this();
@@ -106,6 +116,15 @@ public final class BytesRef implements C
UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
}
+ /**
+ * Copies the UTF8 bytes for this string.
+ *
+ * @param text Must be well-formed unicode text, with no
+ * unpaired surrogates or invalid UTF16 code units.
+ */
+ public void copy(char text[], int offset, int length) {
+ UnicodeUtil.UTF16toUTF8(text, offset, length, this);
+ }
public boolean bytesEquals(BytesRef other) {
if (length == other.length) {
int otherUpto = other.offset;
@@ -277,6 +296,62 @@ public final class BytesRef implements C
}
}
+ private final static Comparator<BytesRef> utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator();
+
+ public static Comparator<BytesRef> getUTF8SortedAsUTF16Comparator() {
+ return utf8SortedAsUTF16SortOrder;
+ }
+
+ private static class UTF8SortedAsUTF16Comparator implements Comparator<BytesRef> {
+ // Only singleton
+ private UTF8SortedAsUTF16Comparator() {};
+
+ public int compare(BytesRef a, BytesRef b) {
+
+ final byte[] aBytes = a.bytes;
+ int aUpto = a.offset;
+ final byte[] bBytes = b.bytes;
+ int bUpto = b.offset;
+
+ final int aStop;
+ if (a.length < b.length) {
+ aStop = aUpto + a.length;
+ } else {
+ aStop = aUpto + b.length;
+ }
+
+ while(aUpto < aStop) {
+ int aByte = aBytes[aUpto++] & 0xff;
+ int bByte = bBytes[bUpto++] & 0xff;
+
+ if (aByte != bByte) {
+
+ // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order
+
+ // We know the terms are not equal, but, we may
+ // have to carefully fixup the bytes at the
+ // difference to match UTF16's sort order:
+ if (aByte >= 0xee && bByte >= 0xee) {
+ if ((aByte & 0xfe) == 0xee) {
+ aByte += 0x10;
+ }
+ if ((bByte&0xfe) == 0xee) {
+ bByte += 0x10;
+ }
+ }
+ return aByte - bByte;
+ }
+ }
+
+ // One is a prefix of the other, or, they are equal:
+ return a.length - b.length;
+ }
+
+ public boolean equals(Object other) {
+ return this == other;
+ }
+ }
+
public void writeExternal(ObjectOutput out)
throws IOException
{
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java Mon Jul 5 08:33:25 2010
@@ -125,6 +125,26 @@ public final class PagedBytes {
return index;
}
+ /** @lucene.internal Reads length as 1 or 2 byte vInt prefix, starting @ start.
+ * Returns the start offset of the next part, suitable as start parameter on next call
+ * to sequentially read all BytesRefs. */
+ public long fillUsingLengthPrefix3(BytesRef b, long start) {
+ final int index = (int) (start >> blockBits);
+ final int offset = (int) (start & blockMask);
+ final byte[] block = b.bytes = blocks[index];
+
+ if ((block[offset] & 128) == 0) {
+ b.length = block[offset];
+ b.offset = offset+1;
+ start += 1L + b.length;
+ } else {
+ b.length = (((int) (block[offset] & 0x7f)) << 8) | (block[1+offset] & 0xff);
+ b.offset = offset+2;
+ start += 2L + b.length;
+ assert b.length > 0;
+ }
+ return start;
+ }
/** @lucene.internal */
public byte[][] getBlocks() {
@@ -230,7 +250,7 @@ public final class PagedBytes {
/** Commits final byte[], trimming it if necessary and if trim=true */
public Reader freeze(boolean trim) {
- if (upto < blockSize) {
+ if (trim && upto < blockSize) {
final byte[] newBlock = new byte[upto];
System.arraycopy(currentBlock, 0, newBlock, 0, upto);
currentBlock = newBlock;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Mon Jul 5 08:33:25 2010
@@ -464,7 +464,7 @@ public class TestAddIndexes extends Luce
private void verifyTermDocs(Directory dir, Term term, int numDocs)
throws IOException {
IndexReader reader = IndexReader.open(dir, true);
- DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, new BytesRef(term.text));
+ DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes);
int count = 0;
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
count++;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java Mon Jul 5 08:33:25 2010
@@ -188,7 +188,7 @@ public class TestPayloads extends Lucene
Term[] terms = generateTerms(fieldName, numTerms);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < terms.length; i++) {
- sb.append(terms[i].text);
+ sb.append(terms[i].text());
sb.append(" ");
}
String content = sb.toString();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java Mon Jul 5 08:33:25 2010
@@ -15,6 +15,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
@@ -65,7 +66,7 @@ public class TestPositionBasedTermVector
//Test single position
for (int i = 0; i < tokens.length; i++) {
String token = tokens[i];
- mapper.map(token, 1, null, thePositions[i]);
+ mapper.map(new BytesRef(token), 1, null, thePositions[i]);
}
Map<String,Map<Integer,PositionBasedTermVectorMapper.TVPositionInfo>> map = mapper.getFieldToTerms();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Mon Jul 5 08:33:25 2010
@@ -100,7 +100,7 @@ public class TestSegmentMerger extends L
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);
- String [] terms = vector.getTerms();
+ BytesRef [] terms = vector.getTerms();
assertTrue(terms != null);
//System.out.println("Terms size: " + terms.length);
assertTrue(terms.length == 3);
@@ -110,7 +110,7 @@ public class TestSegmentMerger extends L
assertTrue(vector instanceof TermPositionVector == true);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
int freq = freqs[i];
//System.out.println("Term: " + term + " Freq: " + freq);
assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Mon Jul 5 08:33:25 2010
@@ -192,11 +192,11 @@ public class TestSegmentReader extends L
public void testTermVectors() throws IOException {
TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(result != null);
- String [] terms = result.getTerms();
+ BytesRef [] terms = result.getTerms();
int [] freqs = result.getTermFrequencies();
assertTrue(terms != null && terms.length == 3 && freqs != null && freqs.length == 3);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
int freq = freqs[i];
assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
assertTrue(freq > 0);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Mon Jul 5 08:33:25 2010
@@ -516,8 +516,8 @@ public class TestStressIndexing2 extends
System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
assertEquals(v1.size(), v2.size());
int numTerms = v1.size();
- String[] terms1 = v1.getTerms();
- String[] terms2 = v2.getTerms();
+ BytesRef[] terms1 = v1.getTerms();
+ BytesRef[] terms2 = v2.getTerms();
int[] freq1 = v1.getTermFrequencies();
int[] freq2 = v2.getTermFrequencies();
for(int j=0;j<numTerms;j++) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java Mon Jul 5 08:33:25 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.MockRAMDirectory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestTermVectorsReader extends LuceneTestCase {
@@ -170,11 +171,11 @@ public class TestTermVectorsReader exten
for (int j = 0; j < 5; j++) {
TermFreqVector vector = reader.get(j, testFields[0]);
assertTrue(vector != null);
- String[] terms = vector.getTerms();
+ BytesRef[] terms = vector.getTerms();
assertTrue(terms != null);
assertTrue(terms.length == testTerms.length);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
//System.out.println("Term: " + term);
assertTrue(term.equals(testTerms[i]));
}
@@ -184,14 +185,14 @@ public class TestTermVectorsReader exten
public void testPositionReader() throws IOException {
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
TermPositionVector vector;
- String[] terms;
+ BytesRef[] terms;
vector = (TermPositionVector) reader.get(0, testFields[0]);
assertTrue(vector != null);
terms = vector.getTerms();
assertTrue(terms != null);
assertTrue(terms.length == testTerms.length);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
//System.out.println("Term: " + term);
assertTrue(term.equals(testTerms[i]));
int[] positions = vector.getTermPositions(i);
@@ -217,7 +218,7 @@ public class TestTermVectorsReader exten
assertTrue(terms != null);
assertTrue(terms.length == testTerms.length);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
//System.out.println("Term: " + term);
assertTrue(term.equals(testTerms[i]));
}
@@ -227,11 +228,11 @@ public class TestTermVectorsReader exten
TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
TermPositionVector vector = (TermPositionVector) reader.get(0, testFields[0]);
assertTrue(vector != null);
- String[] terms = vector.getTerms();
+ BytesRef[] terms = vector.getTerms();
assertTrue(terms != null);
assertTrue(terms.length == testTerms.length);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
//System.out.println("Term: " + term);
assertTrue(term.equals(testTerms[i]));
int[] positions = vector.getTermPositions(i);
@@ -413,7 +414,7 @@ public class TestTermVectorsReader exten
}
@Override
- public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
if (documentNumber == -1) {
throw new RuntimeException("Documentnumber should be set at this point!");
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java Mon Jul 5 08:33:25 2010
@@ -76,7 +76,6 @@ final class TermInfosWriter {
private int lastFieldNumber = -1;
private TermInfosWriter other;
- private BytesRef utf8Result = new BytesRef(10);
TermInfosWriter(Directory directory, String segment, FieldInfos fis,
int interval)
@@ -106,8 +105,7 @@ final class TermInfosWriter {
}
void add(Term term, TermInfo ti) throws IOException {
- UnicodeUtil.UTF16toUTF8(term.text(), 0, term.text().length(), utf8Result);
- add(fieldInfos.fieldNumber(term.field()), utf8Result.bytes, utf8Result.length, ti);
+ add(fieldInfos.fieldNumber(term.field()), term.bytes().bytes, term.bytes().length, ti);
}
// Currently used only by assert statements
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Mon Jul 5 08:33:25 2010
@@ -30,25 +30,6 @@ import org.junit.Test;
public class TestSurrogates extends LuceneTestCaseJ4 {
- // like Term, but uses BytesRef for text
- private static class FieldAndText implements Comparable<FieldAndText> {
- String field;
- BytesRef text;
-
- public FieldAndText(Term t) {
- field = t.field();
- text = new BytesRef(t.text());
- }
-
- public int compareTo(FieldAndText other) {
- if (other.field == field) {
- return text.compareTo(other.text);
- } else {
- return field.compareTo(other.field);
- }
- }
- }
-
// chooses from a very limited alphabet to exacerbate the
// surrogate seeking required
private static String makeDifficultRandomUnicodeString(Random r) {
@@ -76,7 +57,7 @@ public class TestSurrogates extends Luce
return new String(buffer, 0, end);
}
- private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<FieldAndText> fieldTerms) throws IOException {
+ private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<Term> fieldTerms) throws IOException {
final int numField = _TestUtil.nextInt(r, 2, 5);
@@ -110,11 +91,14 @@ public class TestSurrogates extends Luce
fieldInfos.write(dir, segName);
// sorts in UTF16 order, just like preflex:
- Collections.sort(terms);
+ Collections.sort(terms, new Comparator<Term>() {
+ public int compare(Term o1, Term o2) {
+ return o1.compareToUTF16(o2);
+ }
+ });
TermInfosWriter w = new TermInfosWriter(dir, segName, fieldInfos, 128);
TermInfo ti = new TermInfo();
- BytesRef utf8 = new BytesRef(10);
String lastText = null;
int uniqueTermCount = 0;
if (VERBOSE) {
@@ -127,23 +111,22 @@ public class TestSurrogates extends Luce
if (lastText != null && lastText.equals(text)) {
continue;
}
- fieldTerms.add(new FieldAndText(t));
+ fieldTerms.add(t);
uniqueTermCount++;
lastText = text;
- UnicodeUtil.UTF16toUTF8(text, 0, text.length(), utf8);
if (VERBOSE) {
System.out.println(" " + toHexString(t));
}
- w.add(fi.number, utf8.bytes, utf8.length, ti);
+ w.add(fi.number, t.bytes().bytes, t.bytes().length, ti);
}
w.close();
Collections.sort(fieldTerms);
if (VERBOSE) {
System.out.println("\nTEST: codepoint order");
- for(FieldAndText t: fieldTerms) {
- System.out.println(" " + t.field + ":" + UnicodeUtil.toHexString(t.text.utf8ToString()));
+ for(Term t: fieldTerms) {
+ System.out.println(" " + t.field() + ":" + toHexString(t));
}
}
@@ -166,7 +149,7 @@ public class TestSurrogates extends Luce
Random r = newRandom();
FieldInfos fieldInfos = new FieldInfos();
- List<FieldAndText> fieldTerms = new ArrayList<FieldAndText>();
+ List<Term> fieldTerms = new ArrayList<Term>();
SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
// hack alert!!
@@ -188,8 +171,8 @@ public class TestSurrogates extends Luce
BytesRef text;
BytesRef lastText = null;
while((text = termsEnum.next()) != null) {
- UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
if (VERBOSE) {
+ UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
System.out.println("got term=" + field + ":" + UnicodeUtil.toHexString(new String(utf16.result, 0, utf16.length)));
System.out.println();
}
@@ -199,8 +182,8 @@ public class TestSurrogates extends Luce
assertTrue(lastText.compareTo(text) < 0);
lastText.copy(text);
}
- assertEquals(fieldTerms.get(termCount).field, field);
- assertEquals(fieldTerms.get(termCount).text, text);
+ assertEquals(fieldTerms.get(termCount).field(), field);
+ assertEquals(fieldTerms.get(termCount).bytes(), text);
termCount++;
}
if (VERBOSE) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java Mon Jul 5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
@@ -175,11 +176,11 @@ class MultiThreadTermVectorsReader imple
private void verifyVectors(TermFreqVector[] vectors, int num) {
StringBuilder temp = new StringBuilder();
- String[] terms = null;
+ BytesRef[] terms = null;
for (int i = 0; i < vectors.length; i++) {
terms = vectors[i].getTerms();
for (int z = 0; z < terms.length; z++) {
- temp.append(terms[z]);
+ temp.append(terms[z].utf8ToString());
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java Mon Jul 5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -28,12 +29,14 @@ public class TestQueryTermVector extends
}
public void testConstructor() {
- String [] queryTerm = {"foo", "bar", "foo", "again", "foo", "bar", "go", "go", "go"};
+ BytesRef [] queryTerm = {new BytesRef("foo"), new BytesRef("bar"), new BytesRef("foo"),
+ new BytesRef("again"), new BytesRef("foo"), new BytesRef("bar"), new BytesRef("go"),
+ new BytesRef("go"), new BytesRef("go")};
//Items are sorted lexicographically
- String [] gold = {"again", "bar", "foo", "go"};
+ BytesRef [] gold = {new BytesRef("again"), new BytesRef("bar"), new BytesRef("foo"), new BytesRef("go")};
int [] goldFreqs = {1, 2, 3, 3};
QueryTermVector result = new QueryTermVector(queryTerm);
- String [] terms = result.getTerms();
+ BytesRef [] terms = result.getTerms();
assertTrue(terms.length == 4);
int [] freq = result.getTermFrequencies();
assertTrue(freq.length == 4);
@@ -49,7 +52,7 @@ public class TestQueryTermVector extends
checkGold(terms, gold, freq, goldFreqs);
}
- private void checkGold(String[] terms, String[] gold, int[] freq, int[] goldFreqs) {
+ private void checkGold(BytesRef[] terms, BytesRef[] gold, int[] freq, int[] goldFreqs) {
for (int i = 0; i < terms.length; i++) {
assertTrue(terms[i].equals(gold[i]));
assertTrue(freq[i] == goldFreqs[i]);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java Mon Jul 5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
@@ -123,11 +124,11 @@ public class TestTermVectors extends Luc
for(int i=0;i<v.length;i++) {
TermPositionVector posVec = (TermPositionVector) v[i];
assertEquals(expectedFields[i], posVec.getField());
- String[] terms = posVec.getTerms();
+ BytesRef[] terms = posVec.getTerms();
assertEquals(3, terms.length);
- assertEquals("content", terms[0]);
- assertEquals("here", terms[1]);
- assertEquals("some", terms[2]);
+ assertEquals("content", terms[0].utf8ToString());
+ assertEquals("here", terms[1].utf8ToString());
+ assertEquals("some", terms[2].utf8ToString());
for(int j=0;j<3;j++) {
int[] positions = posVec.getTermPositions(j);
assertEquals(1, positions.length);
@@ -156,7 +157,7 @@ public class TestTermVectors extends Luc
if(shouldBePosVector || shouldBeOffVector){
TermPositionVector posVec = (TermPositionVector)vector[0];
- String [] terms = posVec.getTerms();
+ BytesRef [] terms = posVec.getTerms();
assertTrue(terms != null && terms.length > 0);
for (int j = 0; j < terms.length; j++) {
@@ -184,7 +185,7 @@ public class TestTermVectors extends Luc
}
catch(ClassCastException ignore){
TermFreqVector freqVec = vector[0];
- String [] terms = freqVec.getTerms();
+ BytesRef [] terms = freqVec.getTerms();
assertTrue(terms != null && terms.length > 0);
}
@@ -277,11 +278,11 @@ public class TestTermVectors extends Luc
//float coord = sim.coord()
//System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
assertTrue(vector != null);
- String[] vTerms = vector.getTerms();
+ BytesRef[] vTerms = vector.getTerms();
int [] freqs = vector.getTermFrequencies();
for (int i = 0; i < vTerms.length; i++)
{
- if (text.equals(vTerms[i]))
+ if (text.equals(vTerms[i].utf8ToString()))
{
assertTrue(freqs[i] == freq);
}
@@ -306,11 +307,11 @@ public class TestTermVectors extends Luc
TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits[1].doc, "field");
assertTrue(vector != null);
//System.out.println("Vector: " + vector);
- String[] terms = vector.getTerms();
+ BytesRef[] terms = vector.getTerms();
int [] freqs = vector.getTermFrequencies();
assertTrue(terms != null && terms.length == 10);
for (int i = 0; i < terms.length; i++) {
- String term = terms[i];
+ String term = terms[i].utf8ToString();
//System.out.println("Term: " + term);
int freq = freqs[i];
assertTrue(test4.indexOf(term) != -1);
@@ -327,7 +328,7 @@ public class TestTermVectors extends Luc
if (tve != null && last != null)
{
assertTrue("terms are not properly sorted", last.getFrequency() >= tve.getFrequency());
- Integer expectedFreq = test4Map.get(tve.getTerm());
+ Integer expectedFreq = test4Map.get(tve.getTerm().utf8ToString());
//we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
assertTrue("Frequency is not correct:", tve.getFrequency() == 2*expectedFreq.intValue());
}
@@ -421,9 +422,9 @@ public class TestTermVectors extends Luc
assertTrue(vector.length == 1);
TermPositionVector tfv = (TermPositionVector) vector[0];
assertTrue(tfv.getField().equals("field"));
- String[] terms = tfv.getTerms();
+ BytesRef[] terms = tfv.getTerms();
assertEquals(1, terms.length);
- assertEquals(terms[0], "one");
+ assertEquals(terms[0].utf8ToString(), "one");
assertEquals(5, tfv.getTermFrequencies()[0]);
int[] positions = tfv.getTermPositions(0);
@@ -447,7 +448,7 @@ public class TestTermVectors extends Luc
}
@Override
- public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
}
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Mon Jul 5 08:33:25 2010
@@ -265,7 +265,7 @@ public class LukeRequestHandler extends
if( v != null ) {
SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
for( int i=0; i<v.size(); i++ ) {
- tfv.add( v.getTerms()[i], v.getTermFrequencies()[i] );
+ tfv.add( v.getTerms()[i].utf8ToString(), v.getTermFrequencies()[i] );
}
f.add( "termVector", tfv );
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java Mon Jul 5 08:33:25 2010
@@ -292,9 +292,9 @@ public class TermVectorComponent extends
this.reader = reader;
}
- public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
NamedList termInfo = new NamedList();
- fieldNL.add(term, termInfo);
+ fieldNL.add(term.utf8ToString(), termInfo);
if (fieldOptions.termFreq == true) {
termInfo.add("tf", frequency);
}
@@ -323,14 +323,14 @@ public class TermVectorComponent extends
}
}
- private int getDocFreq(String term) {
+ private int getDocFreq(BytesRef term) {
int result = 1;
currentTerm = currentTerm.createTerm(term);
try {
Terms terms = MultiFields.getTerms(reader, currentTerm.field());
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
- if (termsEnum.seek(new BytesRef(term)) == TermsEnum.SeekStatus.FOUND) {
+ if (termsEnum.seek(term) == TermsEnum.SeekStatus.FOUND) {
result = termsEnum.docFreq();
}
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Jul 5 08:33:25 2010
@@ -256,7 +256,7 @@ public class UnInvertedField {
deState.termsEnum = te.tenum;
deState.reuse = te.docsEnum;
}
- DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term.utf8ToString())), deState);
+ DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term)), deState);
te.docsEnum = deState.reuse;
maxTermCounts[termNum] = set.size();
@@ -514,7 +514,7 @@ public class UnInvertedField {
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= startTerm && tt.termNum < endTerm) {
- counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term.utf8ToString())), docs);
+ counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term)), docs);
}
}
@@ -712,7 +712,7 @@ public class UnInvertedField {
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
- final Term t = new Term(ti.field, tt.term.utf8ToString());
+ final Term t = new Term(ti.field, tt.term);
if (finfo.length == 0) {
counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
} else {
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Mon Jul 5 08:33:25 2010
@@ -480,7 +480,7 @@ public class SolrIndexSearcher extends I
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
- BytesRef termBytes = new BytesRef(t.text());
+ BytesRef termBytes = t.bytes();
DocsEnum docs = terms.docs(MultiFields.getDeletedDocs(reader), termBytes, null);
if (docs == null) return -1;
int id = docs.nextDoc();
@@ -754,7 +754,7 @@ public class SolrIndexSearcher extends I
Fields fields = sir.fields();
Terms terms = fields.terms(t.field());
- BytesRef termBytes = new BytesRef(t.text());
+ BytesRef termBytes = t.bytes();
Bits skipDocs = sir.getDeletedDocs();
DocsEnum docsEnum = terms==null ? null : terms.docs(skipDocs, termBytes, null);
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java Mon Jul 5 08:33:25 2010
@@ -118,7 +118,7 @@ public class DirectUpdateHandler extends
DocsEnum tdocs = MultiFields.getTermDocsEnum(ir,
MultiFields.getDeletedDocs(ir),
idTerm.field(),
- new BytesRef(idTerm.text()));
+ idTerm.bytes());
if (tdocs != null) {
return tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS;
} else {