You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [21/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java Mon Oct 21 18:58:24 2013
@@ -16,8 +16,14 @@ package org.apache.lucene.search.vectorh
* limitations under the License.
*/
+import java.util.LinkedList;
+
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util._TestUtil;
public class FieldPhraseListTest extends AbstractTestCase {
@@ -120,31 +126,7 @@ public class FieldPhraseListTest extends
assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
}
-
- public void testProximityPhraseReverse() throws Exception {
- make1d1fIndex( "z a a b c" );
-
- FieldQuery fq = new FieldQuery( pqF( 2F, 3, "c", "a" ), true, true );
- FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
- FieldPhraseList fpl = new FieldPhraseList( stack, fq );
- assertEquals( 1, fpl.phraseList.size() );
- assertEquals( "ac(2.0)((4,5)(8,9))", fpl.phraseList.get( 0 ).toString() );
- assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
- assertEquals( 9, fpl.phraseList.get( 0 ).getEndOffset() );
- }
-
- public void testProximityPhraseWithRepeatedTerms() throws Exception {
- make1d1fIndex( "z a a b b z d" );
-
- FieldQuery fq = new FieldQuery( pqF( 2F, 2, "a", "b", "d" ), true, true );
- FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
- FieldPhraseList fpl = new FieldPhraseList( stack, fq );
- assertEquals( 1, fpl.phraseList.size() );
- assertEquals( "abd(2.0)((4,7)(12,13))", fpl.phraseList.get( 0 ).toString() );
- assertEquals( 4, fpl.phraseList.get( 0 ).getStartOffset() );
- assertEquals( 13, fpl.phraseList.get( 0 ).getEndOffset() );
- }
-
+
public void test2PhrasesOverlap() throws Exception {
make1d1fIndex( "d a b c d" );
@@ -212,7 +194,7 @@ public class FieldPhraseListTest extends
assertEquals( 1, fpl.phraseList.size() );
assertEquals( "sppeeeed(1.0)((88,93))", fpl.phraseList.get( 0 ).toString() );
}
-
+
/* This test shows a big speedup from limiting the number of analyzed phrases in
* this bad case for FieldPhraseList */
/* But it is not reliable as a unit test since it is timing-dependent
@@ -242,4 +224,68 @@ public class FieldPhraseListTest extends
assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );
}
*/
+
+ public void testWeightedPhraseInfoComparisonConsistency() {
+ WeightedPhraseInfo a = newInfo( 0, 0, 1 );
+ WeightedPhraseInfo b = newInfo( 1, 2, 1 );
+ WeightedPhraseInfo c = newInfo( 2, 3, 1 );
+ WeightedPhraseInfo d = newInfo( 0, 0, 1 );
+ WeightedPhraseInfo e = newInfo( 0, 0, 2 );
+
+ assertConsistentEquals( a, a );
+ assertConsistentEquals( b, b );
+ assertConsistentEquals( c, c );
+ assertConsistentEquals( d, d );
+ assertConsistentEquals( e, e );
+ assertConsistentEquals( a, d );
+ assertConsistentLessThan( a, b );
+ assertConsistentLessThan( b, c );
+ assertConsistentLessThan( a, c );
+ assertConsistentLessThan( a, e );
+ assertConsistentLessThan( e, b );
+ assertConsistentLessThan( e, c );
+ assertConsistentLessThan( d, b );
+ assertConsistentLessThan( d, c );
+ assertConsistentLessThan( d, e );
+ }
+
+ public void testToffsComparisonConsistency() {
+ Toffs a = new Toffs( 0, 0 );
+ Toffs b = new Toffs( 1, 2 );
+ Toffs c = new Toffs( 2, 3 );
+ Toffs d = new Toffs( 0, 0 );
+
+ assertConsistentEquals( a, a );
+ assertConsistentEquals( b, b );
+ assertConsistentEquals( c, c );
+ assertConsistentEquals( d, d );
+ assertConsistentEquals( a, d );
+ assertConsistentLessThan( a, b );
+ assertConsistentLessThan( b, c );
+ assertConsistentLessThan( a, c );
+ assertConsistentLessThan( d, b );
+ assertConsistentLessThan( d, c );
+ }
+
+ private WeightedPhraseInfo newInfo( int startOffset, int endOffset, float boost ) {
+ LinkedList< TermInfo > infos = new LinkedList< TermInfo >();
+ infos.add( new TermInfo( _TestUtil.randomUnicodeString( random() ), startOffset, endOffset, 0, 0 ) );
+ return new WeightedPhraseInfo( infos, boost );
+ }
+
+ private < T extends Comparable< T > > void assertConsistentEquals( T a, T b ) {
+ assertEquals( a, b );
+ assertEquals( b, a );
+ assertEquals( a.hashCode(), b.hashCode() );
+ assertEquals( 0, a.compareTo( b ) );
+ assertEquals( 0, b.compareTo( a ) );
+ }
+
+ private < T extends Comparable< T > > void assertConsistentLessThan( T a, T b ) {
+ assertFalse( a.equals( b ) );
+ assertFalse( b.equals( a ) );
+ assertFalse( a.hashCode() == b.hashCode() );
+ assertTrue( a.compareTo( b ) < 0 );
+ assertTrue( b.compareTo( a ) > 0 );
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Mon Oct 21 18:58:24 2013
@@ -863,8 +863,8 @@ public class FieldQueryTest extends Abst
phraseCandidate.add( new TermInfo( "c", 4, 5, 4, 1 ) );
assertNull( fq.searchPhrase( F, phraseCandidate ) );
- // "a b c"~2
- query = pqF( 1F, 2, "a", "b", "c" );
+ // "a b c"~1
+ query = pqF( 1F, 1, "a", "b", "c" );
// phraseHighlight = true, fieldMatch = true
fq = new FieldQuery( query, true, true );
Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java Mon Oct 21 18:58:24 2013
@@ -20,6 +20,8 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util._TestUtil;
public class FieldTermStackTest extends AbstractTestCase {
@@ -173,4 +175,37 @@ public class FieldTermStackTest extends
assertEquals ("the(195,198,31)", stack.pop().toString());
}
+ public void testTermInfoComparisonConsistency() {
+ TermInfo a = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 0, 1 );
+ TermInfo b = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 1, 1 );
+ TermInfo c = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 2, 1 );
+ TermInfo d = new TermInfo( _TestUtil.randomUnicodeString( random() ), 0, 0, 0, 1 );
+
+ assertConsistentEquals( a, a );
+ assertConsistentEquals( b, b );
+ assertConsistentEquals( c, c );
+ assertConsistentEquals( d, d );
+ assertConsistentEquals( a, d );
+ assertConsistentLessThan( a, b );
+ assertConsistentLessThan( b, c );
+ assertConsistentLessThan( a, c );
+ assertConsistentLessThan( d, b );
+ assertConsistentLessThan( d, c );
+ }
+
+ private < T extends Comparable< T > > void assertConsistentEquals( T a, T b ) {
+ assertEquals( a, b );
+ assertEquals( b, a );
+ assertEquals( a.hashCode(), b.hashCode() );
+ assertEquals( 0, a.compareTo( b ) );
+ assertEquals( 0, b.compareTo( a ) );
+ }
+
+ private < T extends Comparable< T > > void assertConsistentLessThan( T a, T b ) {
+ assertFalse( a.equals( b ) );
+ assertFalse( b.equals( a ) );
+ assertFalse( a.hashCode() == b.hashCode() );
+ assertTrue( a.compareTo( b ) < 0 );
+ assertTrue( b.compareTo( a ) > 0 );
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Mon Oct 21 18:58:24 2013
@@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" };
String[] postTags = { "]" };
- assertEquals( "<h1> [a] </h1>",
+ assertEquals( "<h1> [a] </h1>",
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
}
Modified: lucene/dev/branches/lucene4956/lucene/ivy-settings.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/ivy-settings.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/ivy-settings.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/ivy-settings.xml Mon Oct 21 18:58:24 2013
@@ -21,6 +21,8 @@
<property name="local-maven2-dir" value="${user.home}/.m2/repository/" />
+ <properties file="${ivy.settings.dir}/ivy-versions.properties" override="false"/>
+
<include url="${ivy.default.settings.dir}/ivysettings-public.xml"/>
<include url="${ivy.default.settings.dir}/ivysettings-shared.xml"/>
<include url="${ivy.default.settings.dir}/ivysettings-local.xml"/>
@@ -33,7 +35,10 @@
<ibiblio name="maven.restlet.org" root="http://maven.restlet.org" m2compatible="true" />
<!-- you might need to tweak this from china so it works -->
- <ibiblio name="working-chinese-mirror" root="http://mirror.netcologne.de/maven2" m2compatible="true" />
+ <ibiblio name="working-chinese-mirror" root="http://uk.maven.org/maven2" m2compatible="true" />
+
+ <!-- temporary to try Clover 3.2.0 snapshots, see https://issues.apache.org/jira/browse/LUCENE-5243, https://jira.atlassian.com/browse/CLOV-1368 -->
+ <ibiblio name="atlassian-clover-snapshots" root="https://maven.atlassian.com/content/repositories/atlassian-public-snapshot" m2compatible="true" />
<!--
<filesystem name="local-maven-2" m2compatible="true" local="true">
@@ -50,6 +55,7 @@
<resolver ref="main"/>
<resolver ref="sonatype-releases" />
<resolver ref="maven.restlet.org" />
+ <resolver ref="atlassian-clover-snapshots" />
<resolver ref="working-chinese-mirror" />
</chain>
</resolvers>
Modified: lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java Mon Oct 21 18:58:24 2013
@@ -109,7 +109,7 @@ abstract class TermsCollector extends Co
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
+ fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java Mon Oct 21 18:58:24 2013
@@ -129,7 +129,7 @@ abstract class TermsWithScoreCollector e
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
+ fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field, false);
}
static class Avg extends SV {
Modified: lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Mon Oct 21 18:58:24 2013
@@ -64,7 +64,8 @@ public class ToChildBlockJoinQuery exten
*
* @param parentQuery Query that matches parent documents
* @param parentsFilter Filter (must produce FixedBitSet
- * per-segment) identifying the parent documents.
+ * per-segment, like {@link FixedBitSetCachingWrapperFilter})
+ * identifying the parent documents.
* @param doScores true if parent scores should be calculated
*/
public ToChildBlockJoinQuery(Query parentQuery, Filter parentsFilter, boolean doScores) {
Modified: lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Mon Oct 21 18:58:24 2013
@@ -283,6 +283,7 @@ public class ToParentBlockJoinCollector
}
private void enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer) {
+ scorer.trackPendingChildHits();
final Integer slot = joinQueryID.get(query);
if (slot == null) {
joinQueryID.put(query, joinScorers.length);
Modified: lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Mon Oct 21 18:58:24 2013
@@ -17,15 +17,9 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Locale;
-import java.util.Set;
-
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter; // javadocs
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSet;
@@ -41,6 +35,12 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Set;
+
/**
* This query requires that you index
* children and parent docs as a single block, using the
@@ -100,7 +100,8 @@ public class ToParentBlockJoinQuery exte
*
* @param childQuery Query matching child documents.
* @param parentsFilter Filter (must produce FixedBitSet
- * per-segment) identifying the parent documents.
+ * per-segment, like {@link FixedBitSetCachingWrapperFilter})
+ * identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
* into a single parent score.
**/
@@ -217,8 +218,7 @@ public class ToParentBlockJoinQuery exte
private float parentScore;
private int parentFreq;
private int nextChildDoc;
-
- private int[] pendingChildDocs = new int[5];
+ private int[] pendingChildDocs;
private float[] pendingChildScores;
private int childDocUpto;
@@ -229,9 +229,6 @@ public class ToParentBlockJoinQuery exte
this.childScorer = childScorer;
this.scoreMode = scoreMode;
this.acceptDocs = acceptDocs;
- if (scoreMode != ScoreMode.None) {
- pendingChildScores = new float[5];
- }
nextChildDoc = firstChildDoc;
}
@@ -320,18 +317,22 @@ public class ToParentBlockJoinQuery exte
do {
//System.out.println(" c=" + nextChildDoc);
- if (pendingChildDocs.length == childDocUpto) {
+ if (pendingChildDocs != null && pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
- if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
+ if (pendingChildScores != null && scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
- pendingChildDocs[childDocUpto] = nextChildDoc;
+ if (pendingChildDocs != null) {
+ pendingChildDocs[childDocUpto] = nextChildDoc;
+ }
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final int childFreq = childScorer.freq();
- pendingChildScores[childDocUpto] = childScore;
+ if (pendingChildScores != null) {
+ pendingChildScores[childDocUpto] = childScore;
+ }
maxScore = Math.max(childScore, maxScore);
totalScore += childScore;
parentFreq += childFreq;
@@ -431,6 +432,16 @@ public class ToParentBlockJoinQuery exte
public long cost() {
return childScorer.cost();
}
+
+ /**
+ * Instructs this scorer to keep track of the child docIds and score ids for retrieval purposes.
+ */
+ public void trackPendingChildHits() {
+ pendingChildDocs = new int[5];
+ if (scoreMode != ScoreMode.None) {
+ pendingChildScores = new float[5];
+ }
+ }
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java Mon Oct 21 18:58:24 2013
@@ -93,7 +93,7 @@ public class TestBlockJoin extends Lucen
w.close();
assertTrue(r.leaves().size() > 1);
IndexSearcher s = new IndexSearcher(r);
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
BooleanQuery childQuery = new BooleanQuery();
childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
@@ -145,7 +145,7 @@ public class TestBlockJoin extends Lucen
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@@ -249,7 +249,7 @@ public class TestBlockJoin extends Lucen
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@@ -269,7 +269,7 @@ public class TestBlockJoin extends Lucen
assertEquals("dummy filter passes everyone ", 2, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).totalHits);
// not found test
- assertEquals("noone live there", 0, s.search(childJoinQuery, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
+ assertEquals("noone live there", 0, s.search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).totalHits);
assertEquals("noone live there", 0, s.search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).totalHits);
// apply the UK filter by the searcher
@@ -362,7 +362,7 @@ public class TestBlockJoin extends Lucen
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(
NumericRangeQuery.newIntRange("year", 1990, 2010, true, true),
- new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))),
+ new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))),
ScoreMode.Total
);
@@ -565,7 +565,7 @@ public class TestBlockJoin extends Lucen
final IndexSearcher joinS = new IndexSearcher(joinR);
- final Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
+ final Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));
final int iters = 200*RANDOM_MULTIPLIER;
@@ -831,7 +831,7 @@ public class TestBlockJoin extends Lucen
childJoinQuery2 = parentJoinQuery2;
final Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childJoinFilter2 = random().nextBoolean()
- ? new CachingWrapperFilter(f): f;
+ ? new FixedBitSetCachingWrapperFilter(f): f;
} else {
childJoinFilter2 = null;
// AND child field w/ parent query:
@@ -852,7 +852,7 @@ public class TestBlockJoin extends Lucen
childQuery2 = parentQuery2;
final Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
childFilter2 = random().nextBoolean()
- ? new CachingWrapperFilter(f): f;
+ ? new FixedBitSetCachingWrapperFilter(f): f;
} else {
childFilter2 = null;
final BooleanQuery bq2 = new BooleanQuery();
@@ -991,7 +991,7 @@ public class TestBlockJoin extends Lucen
IndexSearcher s = newSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childJobQuery = new BooleanQuery();
@@ -1072,7 +1072,7 @@ public class TestBlockJoin extends Lucen
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "1"));
- Filter parentFilter = new CachingWrapperFilter(
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(new Term("parent", "1"))));
@@ -1106,7 +1106,7 @@ public class TestBlockJoin extends Lucen
w.close();
IndexSearcher s = newSearcher(r);
Query tq = new TermQuery(new Term("child", "2"));
- Filter parentFilter = new CachingWrapperFilter(
+ Filter parentFilter = new FixedBitSetCachingWrapperFilter(
new QueryWrapperFilter(
new TermQuery(new Term("isparent", "yes"))));
@@ -1140,7 +1140,7 @@ public class TestBlockJoin extends Lucen
IndexSearcher s = new IndexSearcher(r);
// Create a filter that defines "parent" documents in the index - in this case resumes
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
// Define child document criteria (finds an example of relevant work experience)
BooleanQuery childQuery = new BooleanQuery();
@@ -1244,7 +1244,7 @@ public class TestBlockJoin extends Lucen
w.close();
Query childQuery = new TermQuery(new Term("childText", "text"));
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
@@ -1310,7 +1310,7 @@ public class TestBlockJoin extends Lucen
// never matches:
Query childQuery = new TermQuery(new Term("childText", "bogus"));
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
@@ -1376,7 +1376,7 @@ public class TestBlockJoin extends Lucen
// illegally matches parent:
Query childQuery = new TermQuery(new Term("parentText", "text"));
- Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
+ Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
BooleanQuery parentQuery = new BooleanQuery();
parentQuery.add(childJoinQuery, Occur.SHOULD);
Modified: lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinSorting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinSorting.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinSorting.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinSorting.java Mon Oct 21 18:58:24 2013
@@ -214,7 +214,7 @@ public class TestBlockJoinSorting extend
Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(
new FilteredQuery(new MatchAllDocsQuery(), childFilter),
- new CachingWrapperFilter(parentFilter),
+ new FixedBitSetCachingWrapperFilter(parentFilter),
ScoreMode.None
);
@@ -279,7 +279,7 @@ public class TestBlockJoinSorting extend
childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
query = new ToParentBlockJoinQuery(
new FilteredQuery(new MatchAllDocsQuery(), childFilter),
- new CachingWrapperFilter(parentFilter),
+ new FixedBitSetCachingWrapperFilter(parentFilter),
ScoreMode.None
);
sortField = new ToParentBlockJoinSortField(
@@ -305,7 +305,7 @@ public class TestBlockJoinSorting extend
}
private Filter wrap(Filter filter) {
- return random().nextBoolean() ? new CachingWrapperFilter(filter) : filter;
+ return random().nextBoolean() ? new FixedBitSetCachingWrapperFilter(filter) : filter;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java Mon Oct 21 18:58:24 2013
@@ -47,6 +47,7 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
@@ -511,13 +512,14 @@ public class TestJoinUtil extends Lucene
private Scorer scorer;
private BinaryDocValues terms;
+ private Bits docsWithField;
private final BytesRef spare = new BytesRef();
@Override
public void collect(int doc) throws IOException {
terms.get(doc, spare);
BytesRef joinValue = spare;
- if (joinValue.bytes == BinaryDocValues.MISSING) {
+ if (joinValue.length == 0 && !docsWithField.get(doc)) {
return;
}
@@ -530,7 +532,8 @@ public class TestJoinUtil extends Lucene
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField);
+ terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField, true);
+ docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), fromField);
}
@Override
@@ -628,7 +631,7 @@ public class TestJoinUtil extends Lucene
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
- terms = FieldCache.DEFAULT.getTerms(context.reader(), toField);
+ terms = FieldCache.DEFAULT.getTerms(context.reader(), toField, false);
docBase = context.docBase;
}
Modified: lucene/dev/branches/lucene4956/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/lucene4956/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Oct 21 18:58:24 2013
@@ -756,6 +756,11 @@ public class MemoryIndex {
return null;
}
+ @Override
+ public Bits getDocsWithField(String field) throws IOException {
+ return null;
+ }
+
private class MemoryFields extends Fields {
@Override
public Iterator<String> iterator() {
@@ -799,11 +804,6 @@ public class MemoryIndex {
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public long size() {
return info.terms.size();
}
@@ -825,6 +825,11 @@ public class MemoryIndex {
}
@Override
+ public boolean hasFreqs() {
+ return true;
+ }
+
+ @Override
public boolean hasOffsets() {
return storeOffsets;
}
@@ -961,11 +966,6 @@ public class MemoryIndex {
}
@Override
- public Comparator<BytesRef> getComparator() {
- return BytesRef.getUTF8SortedAsUnicodeComparator();
- }
-
- @Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null;
this.seekExact(((OrdTermState)state).ord);
Modified: lucene/dev/branches/lucene4956/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Mon Oct 21 18:58:24 2013
@@ -176,7 +176,7 @@ public class MemoryIndexTest extends Bas
private void duellReaders(CompositeReader other, AtomicReader memIndexReader)
throws IOException {
- AtomicReader competitor = new SlowCompositeReaderWrapper(other);
+ AtomicReader competitor = SlowCompositeReaderWrapper.wrap(other);
Fields memFields = memIndexReader.fields();
for (String field : competitor.fields()) {
Terms memTerms = memFields.terms(field);
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java Mon Oct 21 18:58:24 2013
@@ -139,9 +139,8 @@ public class IndexSplitter {
SegmentInfo info = infoPerCommit.info;
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.getDocCount(),
- info.getUseCompoundFile(),
- info.getCodec(), info.getDiagnostics(), info.attributes());
- destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen()));
+ info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics());
+ destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen()));
// now copy files over
Collection<String> files = infoPerCommit.files();
for (final String srcName : files) {
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/index/sorter/SortingAtomicReader.java Mon Oct 21 18:58:24 2013
@@ -221,6 +221,27 @@ public class SortingAtomicReader extends
}
}
+ private static class SortingBits implements Bits {
+
+ private final Bits in;
+ private final Sorter.DocMap docMap;
+
+ public SortingBits(final Bits in, Sorter.DocMap docMap) {
+ this.in = in;
+ this.docMap = docMap;
+ }
+
+ @Override
+ public boolean get(int index) {
+ return in.get(docMap.newToOld(index));
+ }
+
+ @Override
+ public int length() {
+ return in.length();
+ }
+ }
+
private static class SortingSortedDocValues extends SortedDocValues {
private final SortedDocValues in;
@@ -743,20 +764,9 @@ public class SortingAtomicReader extends
final Bits inLiveDocs = in.getLiveDocs();
if (inLiveDocs == null) {
return null;
+ } else {
+ return new SortingBits(inLiveDocs, docMap);
}
- return new Bits() {
-
- @Override
- public boolean get(int index) {
- return inLiveDocs.get(docMap.newToOld(index));
- }
-
- @Override
- public int length() {
- return inLiveDocs.length();
- }
-
- };
}
@Override
@@ -797,6 +807,16 @@ public class SortingAtomicReader extends
}
@Override
+ public Bits getDocsWithField(String field) throws IOException {
+ Bits bits = in.getDocsWithField(field);
+ if (bits == null || bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) {
+ return bits;
+ } else {
+ return new SortingBits(bits, docMap);
+ }
+ }
+
+ @Override
public Fields getTermVectors(final int docID) throws IOException {
return in.getTermVectors(docMap.newToOld(docID));
}
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/GetTermInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/GetTermInfo.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/GetTermInfo.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/GetTermInfo.java Mon Oct 21 18:58:24 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.misc;
import java.io.File;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -50,9 +49,8 @@ public class GetTermInfo {
public static void getTermInfo(Directory dir, Term term) throws Exception {
IndexReader reader = DirectoryReader.open(dir);
- long totalTF = HighFreqTerms.getTotalTermFreq(reader, term);
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
- term.field(), term.text(), totalTF, reader.docFreq(term));
+ term.field(), term.text(), reader.totalTermFreq(term), reader.docFreq(term));
}
private static void usage() {
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Mon Oct 21 18:58:24 2013
@@ -17,26 +17,19 @@ package org.apache.lucene.misc;
* limitations under the License.
*/
-import org.apache.lucene.index.AtomicReader;
-import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.ReaderUtil;
-import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Bits;
import java.io.File;
import java.io.IOException;
-import java.util.Arrays;
import java.util.Comparator;
/**
@@ -51,27 +44,24 @@ import java.util.Comparator;
public class HighFreqTerms {
// The top numTerms will be displayed
- public static final int DEFAULTnumTerms = 100;
- public static int numTerms = DEFAULTnumTerms;
+ public static final int DEFAULT_NUMTERMS = 100;
public static void main(String[] args) throws Exception {
- IndexReader reader = null;
- FSDirectory dir = null;
String field = null;
- boolean IncludeTermFreqs = false;
+ int numTerms = DEFAULT_NUMTERMS;
if (args.length == 0 || args.length > 4) {
usage();
System.exit(1);
}
- if (args.length > 0) {
- dir = FSDirectory.open(new File(args[0]));
- }
+ Directory dir = FSDirectory.open(new File(args[0]));
+
+ Comparator<TermStats> comparator = new DocFreqComparator();
for (int i = 1; i < args.length; i++) {
if (args[i].equals("-t")) {
- IncludeTermFreqs = true;
+ comparator = new TotalTermFreqComparator();
}
else{
try {
@@ -82,22 +72,12 @@ public class HighFreqTerms {
}
}
- reader = DirectoryReader.open(dir);
- TermStats[] terms = getHighFreqTerms(reader, numTerms, field);
- if (!IncludeTermFreqs) {
- //default HighFreqTerms behavior
- for (int i = 0; i < terms.length; i++) {
- System.out.printf("%s:%s %,d \n",
- terms[i].field, terms[i].termtext.utf8ToString(), terms[i].docFreq);
- }
- }
- else{
- TermStats[] termsWithTF = sortByTotalTermFreq(reader, terms);
- for (int i = 0; i < termsWithTF.length; i++) {
- System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
- termsWithTF[i].field, termsWithTF[i].termtext.utf8ToString(),
- termsWithTF[i].totalTermFreq, termsWithTF[i].docFreq);
- }
+ IndexReader reader = DirectoryReader.open(dir);
+ TermStats[] terms = getHighFreqTerms(reader, numTerms, field, comparator);
+
+ for (int i = 0; i < terms.length; i++) {
+ System.out.printf("%s:%s \t totalTF = %,d \t docFreq = %,d \n",
+ terms[i].field, terms[i].termtext.utf8ToString(), terms[i].totalTermFreq, terms[i].docFreq);
}
reader.close();
}
@@ -105,12 +85,13 @@ public class HighFreqTerms {
private static void usage() {
System.out
.println("\n\n"
- + "java org.apache.lucene.misc.HighFreqTerms <index dir> [-t] [number_terms] [field]\n\t -t: include totalTermFreq\n\n");
+ + "java org.apache.lucene.misc.HighFreqTerms <index dir> [-t] [number_terms] [field]\n\t -t: order by totalTermFreq\n\n");
}
+
/**
- * Returns TermStats[] ordered by terms with highest docFreq first.
+ * Returns TermStats[] ordered by the specified comparator
*/
- public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String field) throws Exception {
+ public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, String field, Comparator<TermStats> comparator) throws Exception {
TermStatsQueue tiq = null;
if (field != null) {
@@ -121,7 +102,7 @@ public class HighFreqTerms {
Terms terms = fields.terms(field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
- tiq = new TermStatsQueue(numTerms);
+ tiq = new TermStatsQueue(numTerms, comparator);
tiq.fill(field, termsEnum);
}
} else {
@@ -129,7 +110,7 @@ public class HighFreqTerms {
if (fields == null) {
throw new RuntimeException("no fields found for this index");
}
- tiq = new TermStatsQueue(numTerms);
+ tiq = new TermStatsQueue(numTerms, comparator);
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
if (terms != null) {
@@ -150,91 +131,61 @@ public class HighFreqTerms {
}
/**
- * Takes array of TermStats. For each term looks up the tf for each doc
- * containing the term and stores the total in the output array of TermStats.
- * Output array is sorted by highest total tf.
- *
- * @param terms
- * TermStats[]
- * @return TermStats[]
+ * Compares terms by docTermFreq
*/
-
- public static TermStats[] sortByTotalTermFreq(IndexReader reader, TermStats[] terms) throws Exception {
- TermStats[] ts = new TermStats[terms.length]; // array for sorting
- long totalTF;
- for (int i = 0; i < terms.length; i++) {
- totalTF = getTotalTermFreq(reader, new Term(terms[i].field, terms[i].termtext));
- ts[i] = new TermStats(terms[i].field, terms[i].termtext, terms[i].docFreq, totalTF);
- }
-
- Comparator<TermStats> c = new TotalTermFreqComparatorSortDescending();
- Arrays.sort(ts, c);
+ public static final class DocFreqComparator implements Comparator<TermStats> {
- return ts;
- }
-
- public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {
- long totalTF = 0L;
- for (final AtomicReaderContext ctx : reader.leaves()) {
- AtomicReader r = ctx.reader();
- if (!r.hasDeletions()) {
- // TODO: we could do this up front, during the scan
- // (next()), instead of after-the-fact here w/ seek,
- // if the codec supports it and there are no del
- // docs...
- final long totTF = r.totalTermFreq(term);
- if (totTF != -1) {
- totalTF += totTF;
- continue;
- } // otherwise we fall-through
- }
- // note: what should we do if field omits freqs? currently it counts as 1...
- DocsEnum de = r.termDocsEnum(term);
- if (de != null) {
- while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
- totalTF += de.freq();
+ @Override
+ public int compare(TermStats a, TermStats b) {
+ int res = Long.compare(a.docFreq, b.docFreq);
+ if (res == 0) {
+ res = a.field.compareTo(b.field);
+ if (res == 0) {
+ res = a.termtext.compareTo(b.termtext);
+ }
}
+ return res;
}
-
- return totalTF;
}
- }
-/**
- * Comparator
- *
- * Reverse of normal Comparator. i.e. returns 1 if a.totalTermFreq is less than
- * b.totalTermFreq So we can sort in descending order of totalTermFreq
- */
-
-final class TotalTermFreqComparatorSortDescending implements Comparator<TermStats> {
-
- @Override
- public int compare(TermStats a, TermStats b) {
- return Long.compare(b.totalTermFreq, a.totalTermFreq);
+ /**
+ * Compares terms by totalTermFreq
+ */
+ public static final class TotalTermFreqComparator implements Comparator<TermStats> {
+
+ @Override
+ public int compare(TermStats a, TermStats b) {
+ int res = Long.compare(a.totalTermFreq, b.totalTermFreq);
+ if (res == 0) {
+ res = a.field.compareTo(b.field);
+ if (res == 0) {
+ res = a.termtext.compareTo(b.termtext);
+ }
+ }
+ return res;
+ }
}
-}
-
-/**
- * Priority queue for TermStats objects ordered by docFreq
- **/
-final class TermStatsQueue extends PriorityQueue<TermStats> {
- TermStatsQueue(int size) {
- super(size);
- }
-
- @Override
- protected boolean lessThan(TermStats termInfoA, TermStats termInfoB) {
- return termInfoA.docFreq < termInfoB.docFreq;
- }
-
- protected void fill(String field, TermsEnum termsEnum) throws IOException {
- while (true) {
- BytesRef term = termsEnum.next();
- if (term != null) {
- insertWithOverflow(new TermStats(field, term, termsEnum.docFreq()));
- } else {
- break;
+
+ /**
+ * Priority queue for TermStats objects
+ **/
+ static final class TermStatsQueue extends PriorityQueue<TermStats> {
+ final Comparator<TermStats> comparator;
+
+ TermStatsQueue(int size, Comparator<TermStats> comparator) {
+ super(size);
+ this.comparator = comparator;
+ }
+
+ @Override
+ protected boolean lessThan(TermStats termInfoA, TermStats termInfoB) {
+ return comparator.compare(termInfoA, termInfoB) < 0;
+ }
+
+ protected void fill(String field, TermsEnum termsEnum) throws IOException {
+ BytesRef term = null;
+ while ((term = termsEnum.next()) != null) {
+ insertWithOverflow(new TermStats(field, term, termsEnum.docFreq(), termsEnum.totalTermFreq()));
}
}
}
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/TermStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/TermStats.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/TermStats.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/java/org/apache/lucene/misc/TermStats.java Mon Oct 21 18:58:24 2013
@@ -29,12 +29,6 @@ public final class TermStats {
public int docFreq;
public long totalTermFreq;
- TermStats(String field, BytesRef termtext, int df) {
- this.termtext = BytesRef.deepCopyOf(termtext);
- this.field = field;
- this.docFreq = df;
- }
-
TermStats(String field, BytesRef termtext, int df, long tf) {
this.termtext = BytesRef.deepCopyOf(termtext);
this.field = field;
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/SorterTestBase.java Mon Oct 21 18:58:24 2013
@@ -114,7 +114,6 @@ public abstract class SorterTestBase ext
public PositionsTokenStream() {
term = addAttribute(CharTermAttribute.class);
- term.append(DOC_POSITIONS_TERM);
payload = addAttribute(PayloadAttribute.class);
offset = addAttribute(OffsetAttribute.class);
}
@@ -125,6 +124,8 @@ public abstract class SorterTestBase ext
return false;
}
+ clearAttributes();
+ term.append(DOC_POSITIONS_TERM);
payload.setPayload(new BytesRef(Integer.toString(pos)));
offset.setOffset(off, off);
--pos;
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/index/sorter/TestSortingMergePolicy.java Mon Oct 21 18:58:24 2013
@@ -37,7 +37,6 @@ import org.apache.lucene.index.LogMergeP
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TieredMergePolicy;
@@ -121,9 +120,23 @@ public class TestSortingMergePolicy exte
iw1.commit();
iw2.commit();
final Document doc = randomDocument();
- iw1.addDocument(doc);
- iw2.addDocument(doc);
-
+ // NOTE: don't use RIW.addDocument directly, since it sometimes commits
+ // which may trigger a merge, at which case forceMerge may not do anything.
+ // With field updates this is a problem, since the updates can go into the
+ // single segment in the index, and threefore the index won't be sorted.
+ // This hurts the assumption of the test later on, that the index is sorted
+ // by SortingMP.
+ iw1.w.addDocument(doc);
+ iw2.w.addDocument(doc);
+
+ if (defaultCodecSupportsFieldUpdates()) {
+ // update NDV of docs belonging to one term (covers many documents)
+ final long value = random().nextLong();
+ final String term = RandomPicks.randomFrom(random(), terms);
+ iw1.w.updateNumericDocValue(new Term("s", term), "ndv", value);
+ iw2.w.updateNumericDocValue(new Term("s", term), "ndv", value);
+ }
+
iw1.forceMerge(1);
iw2.forceMerge(1);
iw1.close();
@@ -144,7 +157,7 @@ public class TestSortingMergePolicy exte
private static void assertSorted(AtomicReader reader) throws IOException {
final NumericDocValues ndv = reader.getNumericDocValues("ndv");
for (int i = 1; i < reader.maxDoc(); ++i) {
- assertTrue(ndv.get(i-1) <= ndv.get(i));
+ assertTrue("ndv(" + (i-1) + ")=" + ndv.get(i-1) + ",ndv(" + i + ")=" + ndv.get(i), ndv.get(i-1) <= ndv.get(i));
}
}
@@ -154,6 +167,7 @@ public class TestSortingMergePolicy exte
assertSorted(sortedReader1);
assertSorted(sortedReader2);
+
assertReaderEquals("", sortedReader1, sortedReader2);
}
Modified: lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (original)
+++ lucene/dev/branches/lucene4956/lucene/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java Mon Oct 21 18:58:24 2013
@@ -26,9 +26,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.AfterClass;
@@ -66,21 +64,21 @@ public class TestHighFreqTerms extends L
public void testFirstTermHighestDocFreqAllFields () throws Exception{
int numTerms = 12;
String field =null;
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
assertEquals("Term with highest docfreq is first", 20,terms[0].docFreq );
}
public void testFirstTermHighestDocFreq () throws Exception{
int numTerms = 12;
String field="FIELD_1";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
assertEquals("Term with highest docfreq is first", 10,terms[0].docFreq );
}
public void testOrderedByDocFreqDescending () throws Exception{
int numTerms = 12;
String field="FIELD_1";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
for (int i = 0; i < terms.length; i++) {
if (i > 0) {
assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
@@ -91,14 +89,14 @@ public class TestHighFreqTerms extends L
public void testNumTerms () throws Exception{
int numTerms = 12;
String field = null;
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
assertEquals("length of terms array equals numTerms :" + numTerms, numTerms, terms.length);
}
public void testGetHighFreqTerms () throws Exception{
int numTerms=12;
String field="FIELD_1";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.DocFreqComparator());
for (int i = 0; i < terms.length; i++) {
String termtext = terms[i].termtext.utf8ToString();
@@ -122,30 +120,27 @@ public class TestHighFreqTerms extends L
public void testFirstTermHighestTotalTermFreq () throws Exception{
int numTerms = 20;
String field = null;
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
- TermStats[] termsWithTotalTermFreq = HighFreqTerms.sortByTotalTermFreq(reader, terms);
- assertEquals("Term with highest totalTermFreq is first",200, termsWithTotalTermFreq[0].totalTermFreq);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparator());
+ assertEquals("Term with highest totalTermFreq is first",200, terms[0].totalTermFreq);
}
public void testFirstTermHighestTotalTermFreqDifferentField () throws Exception{
int numTerms = 20;
String field = "different_field";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
- TermStats[] termsWithTotalTermFreq = HighFreqTerms.sortByTotalTermFreq(reader, terms);
- assertEquals("Term with highest totalTermFreq is first"+ termsWithTotalTermFreq[0].getTermText(),150, termsWithTotalTermFreq[0].totalTermFreq);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparator());
+ assertEquals("Term with highest totalTermFreq is first"+ terms[0].getTermText(),150, terms[0].totalTermFreq);
}
public void testOrderedByTermFreqDescending () throws Exception{
int numTerms = 12;
String field = "FIELD_1";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
- TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparator());
- for (int i = 0; i < termsWithTF.length; i++) {
+ for (int i = 0; i < terms.length; i++) {
// check that they are sorted by descending termfreq
// order
if (i > 0) {
- assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq >= termsWithTF[i].totalTermFreq);
+ assertTrue ("out of order" +terms[i-1]+ " > " +terms[i],terms[i-1].totalTermFreq >= terms[i].totalTermFreq);
}
}
}
@@ -153,49 +148,29 @@ public class TestHighFreqTerms extends L
public void testGetTermFreqOrdered () throws Exception{
int numTerms = 12;
String field = "FIELD_1";
- TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
- TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms);
+ TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field, new HighFreqTerms.TotalTermFreqComparator());
- for (int i = 0; i < termsWithTF.length; i++) {
- String text = termsWithTF[i].termtext.utf8ToString();
+ for (int i = 0; i < terms.length; i++) {
+ String text = terms[i].termtext.utf8ToString();
if (text.contains("highTF")) {
if (text.contains("medDF")) {
assertEquals("total term freq is expected", 125,
- termsWithTF[i].totalTermFreq);
+ terms[i].totalTermFreq);
} else {
assertEquals("total term freq is expected", 200,
- termsWithTF[i].totalTermFreq);
+ terms[i].totalTermFreq);
}
} else {
int n = Integer.parseInt(text);
assertEquals("doc freq is expected", getExpecteddocFreq(n),
- termsWithTF[i].docFreq);
+ terms[i].docFreq);
assertEquals("total term freq is expected", getExpectedtotalTermFreq(n),
- termsWithTF[i].totalTermFreq);
+ terms[i].totalTermFreq);
}
}
}
-
- /********************Tests for getTotalTermFreq**********************************/
-
- public void testGetTotalTermFreq() throws Exception{
- String term ="highTF";
- BytesRef termtext = new BytesRef (term);
- String field = "FIELD_1";
- long totalTermFreq = HighFreqTerms.getTotalTermFreq(reader, new Term(field, termtext));
- assertEquals("highTf tf should be 200",200,totalTermFreq);
-
- }
-
- public void testGetTotalTermFreqBadTerm() throws Exception{
- String term ="foobar";
- BytesRef termtext = new BytesRef (term);
- String field = "FIELD_1";
- long totalTermFreq = HighFreqTerms.getTotalTermFreq(reader, new Term(field, termtext));
- assertEquals("totalTermFreq should be 0 for term not in index",0,totalTermFreq);
-
- }
+
/********************Testing Utils**********************************/
private static void indexDocs(IndexWriter writer) throws Exception {
Modified: lucene/dev/branches/lucene4956/lucene/module-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/module-build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/module-build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/module-build.xml Mon Oct 21 18:58:24 2013
@@ -31,7 +31,6 @@
<!-- if you extend the classpath refid in one contrib's build.xml (add JARs), use this as basis: -->
<path id="base.classpath">
<pathelement location="${common.dir}/build/core/classes/java"/>
- <pathelement path="${project.classpath}"/>
</path>
<!-- default classpath refid, can be overridden by contrib's build.xml (use the above base.classpath as basis): -->
@@ -455,6 +454,28 @@
<property name="codecs-javadocs.uptodate" value="true"/>
</target>
+ <property name="expressions.jar" value="${common.dir}/build/expressions/lucene-expressions-${version}.jar"/>
+ <target name="check-expressions-uptodate" unless="expressions.uptodate">
+ <module-uptodate name="expressions" jarfile="${expressions.jar}" property="expressions.uptodate"/>
+ </target>
+ <target name="jar-expressions" unless="expressions.uptodate" depends="check-expressions-uptodate">
+ <ant dir="${common.dir}/expressions" target="jar-core" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="expressions.uptodate" value="true"/>
+ </target>
+
+ <property name="expressions-javadoc.jar" value="${common.dir}/build/expressions/lucene-expressions-${version}-javadoc.jar"/>
+ <target name="check-expressions-javadocs-uptodate" unless="expressions-javadocs.uptodate">
+ <module-uptodate name="expressions" jarfile="${expressions-javadoc.jar}" property="expressions-javadocs.uptodate"/>
+ </target>
+ <target name="javadocs-expressions" unless="expressions-javadocs.uptodate" depends="check-expressions-javadocs-uptodate">
+ <ant dir="${common.dir}/expressions" target="javadocs" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="expressions-javadocs.uptodate" value="true"/>
+ </target>
+
<property name="grouping.jar" value="${common.dir}/build/grouping/lucene-grouping-${version}.jar"/>
<target name="check-grouping-uptodate" unless="grouping.uptodate">
<module-uptodate name="grouping" jarfile="${grouping.jar}" property="grouping.uptodate"/>
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java Mon Oct 21 18:58:24 2013
@@ -38,16 +38,16 @@ import org.apache.lucene.util.ToStringUt
/**
* A query that executes high-frequency terms in a optional sub-query to prevent
- * slow queries due to "common" terms like stopwords. This query basically
- * builds 2 queries off the {@link #add(Term) added} terms where low-frequency
+ * slow queries due to "common" terms like stopwords. This query
+ * builds 2 queries off the {@link #add(Term) added} terms: low-frequency
* terms are added to a required boolean clause and high-frequency terms are
* added to an optional boolean clause. The optional clause is only executed if
- * the required "low-frequency' clause matches. Scores produced by this query
- * will be slightly different to plain {@link BooleanQuery} scorer mainly due to
- * differences in the {@link Similarity#coord(int,int) number of leave queries}
- * in the required boolean clause. In the most cases high-frequency terms are
+ * the required "low-frequency" clause matches. Scores produced by this query
+ * will be slightly different than plain {@link BooleanQuery} scorer mainly due to
+ * differences in the {@link Similarity#coord(int,int) number of leaf queries}
+ * in the required boolean clause. In most cases, high-frequency terms are
* unlikely to significantly contribute to the document score unless at least
- * one of the low-frequency terms are matched such that this query can improve
+ * one of the low-frequency terms are matched. This query can improve
* query execution times significantly if applicable.
* <p>
* {@link CommonTermsQuery} has several advantages over stopword filtering at
@@ -173,7 +173,7 @@ public class CommonTermsQuery extends Qu
if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
return (int) minNrShouldMatch;
}
- return (int) (Math.round(minNrShouldMatch * numOptional));
+ return Math.round(minNrShouldMatch * numOptional);
}
protected Query buildQuery(final int maxDoc,
@@ -214,18 +214,13 @@ public class CommonTermsQuery extends Qu
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
* prevent slow queries.
*/
- if (highFreqOccur == Occur.MUST) {
- highFreq.setBoost(getBoost());
- return highFreq;
- } else {
- BooleanQuery highFreqConjunction = new BooleanQuery();
+ if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
for (BooleanClause booleanClause : highFreq) {
- highFreqConjunction.add(booleanClause.getQuery(), Occur.MUST);
+ booleanClause.setOccur(Occur.MUST);
}
- highFreqConjunction.setBoost(getBoost());
- return highFreqConjunction;
-
}
+ highFreq.setBoost(getBoost());
+ return highFreq;
} else if (highFreq.clauses().isEmpty()) {
// only do low freq terms - we don't have high freq terms
lowFreq.setBoost(getBoost());
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java Mon Oct 21 18:58:24 2013
@@ -148,8 +148,16 @@ public abstract class DocTermsIndexDocVa
@Override
public void fillValue(int doc) {
- termsIndex.get(doc, mval.value);
- mval.exists = mval.value.bytes != SortedDocValues.MISSING;
+ int ord = termsIndex.getOrd(doc);
+ if (ord == -1) {
+ mval.value.bytes = BytesRef.EMPTY_BYTES;
+ mval.value.offset = 0;
+ mval.value.length = 0;
+ mval.exists = false;
+ } else {
+ termsIndex.lookupOrd(ord, mval.value);
+ mval.exists = true;
+ }
}
};
}
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java Mon Oct 21 18:58:24 2013
@@ -17,8 +17,10 @@ package org.apache.lucene.queries.functi
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.ValueSourceScorer;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueDouble;
@@ -80,6 +82,64 @@ public abstract class DoubleDocValues ex
public String toString(int doc) {
return vs.description() + '=' + strVal(doc);
}
+
+ @Override
+ public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
+ double lower,upper;
+
+ if (lowerVal==null) {
+ lower = Double.NEGATIVE_INFINITY;
+ } else {
+ lower = Double.parseDouble(lowerVal);
+ }
+
+ if (upperVal==null) {
+ upper = Double.POSITIVE_INFINITY;
+ } else {
+ upper = Double.parseDouble(upperVal);
+ }
+
+ final double l = lower;
+ final double u = upper;
+
+
+ if (includeLower && includeUpper) {
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ double docVal = doubleVal(doc);
+ return docVal >= l && docVal <= u;
+ }
+ };
+ }
+ else if (includeLower && !includeUpper) {
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ double docVal = doubleVal(doc);
+ return docVal >= l && docVal < u;
+ }
+ };
+ }
+ else if (!includeLower && includeUpper) {
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ double docVal = doubleVal(doc);
+ return docVal > l && docVal <= u;
+ }
+ };
+ }
+ else {
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ double docVal = doubleVal(doc);
+ return docVal > l && docVal < u;
+ }
+ };
+ }
+ }
@Override
public ValueFiller getValueFiller() {
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java Mon Oct 21 18:58:24 2013
@@ -17,8 +17,10 @@ package org.apache.lucene.queries.functi
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.ValueSourceScorer;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt;
@@ -75,6 +77,40 @@ public abstract class IntDocValues exten
public String toString(int doc) {
return vs.description() + '=' + strVal(doc);
}
+
+ @Override
+ public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
+ int lower,upper;
+
+ // instead of using separate comparison functions, adjust the endpoints.
+
+ if (lowerVal==null) {
+ lower = Integer.MIN_VALUE;
+ } else {
+ lower = Integer.parseInt(lowerVal);
+ if (!includeLower && lower < Integer.MAX_VALUE) lower++;
+ }
+
+ if (upperVal==null) {
+ upper = Integer.MAX_VALUE;
+ } else {
+ upper = Integer.parseInt(upperVal);
+ if (!includeUpper && upper > Integer.MIN_VALUE) upper--;
+ }
+
+ final int ll = lower;
+ final int uu = upper;
+
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ int val = intVal(doc);
+ // only check for deleted if it's the default value
+ // if (val==0 && reader.isDeleted(doc)) return false;
+ return val >= ll && val <= uu;
+ }
+ };
+ }
@Override
public ValueFiller getValueFiller() {
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java Mon Oct 21 18:58:24 2013
@@ -17,8 +17,10 @@ package org.apache.lucene.queries.functi
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.ValueSourceScorer;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueLong;
@@ -80,6 +82,44 @@ public abstract class LongDocValues exte
public String toString(int doc) {
return vs.description() + '=' + strVal(doc);
}
+
+ protected long externalToLong(String extVal) {
+ return Long.parseLong(extVal);
+ }
+
+ @Override
+ public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
+ long lower,upper;
+
+ // instead of using separate comparison functions, adjust the endpoints.
+
+ if (lowerVal==null) {
+ lower = Long.MIN_VALUE;
+ } else {
+ lower = externalToLong(lowerVal);
+ if (!includeLower && lower < Long.MAX_VALUE) lower++;
+ }
+
+ if (upperVal==null) {
+ upper = Long.MAX_VALUE;
+ } else {
+ upper = externalToLong(upperVal);
+ if (!includeUpper && upper > Long.MIN_VALUE) upper--;
+ }
+
+ final long ll = lower;
+ final long uu = upper;
+
+ return new ValueSourceScorer(reader, this) {
+ @Override
+ public boolean matchesValue(int doc) {
+ long val = longVal(doc);
+ // only check for deleted if it's the default value
+ // if (val==0 && reader.isDeleted(doc)) return false;
+ return val >= ll && val <= uu;
+ }
+ };
+ }
@Override
public ValueFiller getValueFiller() {
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java Mon Oct 21 18:58:24 2013
@@ -45,12 +45,13 @@ public class BytesRefFieldSource extends
// To be sorted or not to be sorted, that is the question
// TODO: do it cleaner?
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
- final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
+ final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field, true);
+ final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
return new FunctionValues() {
@Override
public boolean exists(int doc) {
- return true; // doc values are dense
+ return docsWithField.get(doc);
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java Mon Oct 21 18:58:24 2013
@@ -31,13 +31,9 @@ import org.apache.lucene.util.mutable.Mu
import org.apache.lucene.util.mutable.MutableValueDouble;
/**
- * Obtains float field values from the {@link org.apache.lucene.search.FieldCache}
- * using <code>getFloats()</code>
- * and makes those values available as other numeric types, casting as needed.
- *
- *
+ * Obtains double field values from {@link FieldCache#getDoubles} and makes
+ * those values available as other numeric types, casting as needed.
*/
-
public class DoubleFieldSource extends FieldCacheSource {
protected final FieldCache.DoubleParser parser;
@@ -68,65 +64,7 @@ public class DoubleFieldSource extends F
@Override
public boolean exists(int doc) {
- return valid.get(doc);
- }
-
- @Override
- public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
- double lower,upper;
-
- if (lowerVal==null) {
- lower = Double.NEGATIVE_INFINITY;
- } else {
- lower = Double.parseDouble(lowerVal);
- }
-
- if (upperVal==null) {
- upper = Double.POSITIVE_INFINITY;
- } else {
- upper = Double.parseDouble(upperVal);
- }
-
- final double l = lower;
- final double u = upper;
-
-
- if (includeLower && includeUpper) {
- return new ValueSourceScorer(reader, this) {
- @Override
- public boolean matchesValue(int doc) {
- double docVal = doubleVal(doc);
- return docVal >= l && docVal <= u;
- }
- };
- }
- else if (includeLower && !includeUpper) {
- return new ValueSourceScorer(reader, this) {
- @Override
- public boolean matchesValue(int doc) {
- double docVal = doubleVal(doc);
- return docVal >= l && docVal < u;
- }
- };
- }
- else if (!includeLower && includeUpper) {
- return new ValueSourceScorer(reader, this) {
- @Override
- public boolean matchesValue(int doc) {
- double docVal = doubleVal(doc);
- return docVal > l && docVal <= u;
- }
- };
- }
- else {
- return new ValueSourceScorer(reader, this) {
- @Override
- public boolean matchesValue(int doc) {
- double docVal = doubleVal(doc);
- return docVal > l && docVal < u;
- }
- };
- }
+ return arr.get(doc) != 0 || valid.get(doc);
}
@Override
@@ -142,7 +80,7 @@ public class DoubleFieldSource extends F
@Override
public void fillValue(int doc) {
mval.value = arr.get(doc);
- mval.exists = valid.get(doc);
+ mval.exists = mval.value != 0 || valid.get(doc);
}
};
}
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java Mon Oct 21 18:58:24 2013
@@ -29,13 +29,9 @@ import org.apache.lucene.util.mutable.Mu
import org.apache.lucene.util.mutable.MutableValueFloat;
/**
- * Obtains float field values from the {@link org.apache.lucene.search.FieldCache}
- * using <code>getFloats()</code>
- * and makes those values available as other numeric types, casting as needed.
- *
- *
+ * Obtains float field values from {@link FieldCache#getFloats} and makes those
+ * values available as other numeric types, casting as needed.
*/
-
public class FloatFieldSource extends FieldCacheSource {
protected final FieldCache.FloatParser parser;
@@ -72,7 +68,7 @@ public class FloatFieldSource extends Fi
@Override
public boolean exists(int doc) {
- return valid.get(doc);
+ return arr.get(doc) != 0 || valid.get(doc);
}
@Override
@@ -88,7 +84,7 @@ public class FloatFieldSource extends Fi
@Override
public void fillValue(int doc) {
mval.value = arr.get(doc);
- mval.exists = valid.get(doc);
+ mval.exists = mval.value != 0 || valid.get(doc);
}
};
}