You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2013/05/22 17:51:12 UTC
svn commit: r1485259 [3/5] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/replicator/ dev-tools...
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java Wed May 22 15:51:08 2013
@@ -49,7 +49,7 @@ public class TestPersistentSnapshotDelet
@Test
public void testExistingSnapshots() throws Exception {
int numSnapshots = 3;
- Directory dir = newDirectory();
+ MockDirectoryWrapper dir = newMockDirectory();
IndexWriter writer = new IndexWriter(dir, getConfig(random(), getDeletionPolicy(dir)));
PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
assertNull(psdp.getLastSaveFile());
@@ -57,6 +57,19 @@ public class TestPersistentSnapshotDelet
assertNotNull(psdp.getLastSaveFile());
writer.close();
+ // Make sure only 1 save file exists:
+ int count = 0;
+ for(String file : dir.listAll()) {
+ if (file.startsWith(PersistentSnapshotDeletionPolicy.SNAPSHOTS_PREFIX)) {
+ count++;
+ }
+ }
+ assertEquals(1, count);
+
+ // Make sure we fsync:
+ dir.crash();
+ dir.clearCrash();
+
// Re-initialize and verify snapshots were persisted
psdp = new PersistentSnapshotDeletionPolicy(
new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode.APPEND);
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestLiveFieldValues.java Wed May 22 15:51:08 2013
@@ -58,7 +58,7 @@ public class TestLiveFieldValues extends
final Integer missing = -1;
- final LiveFieldValues<Integer> rt = new LiveFieldValues<Integer>(mgr, missing) {
+ final LiveFieldValues<IndexSearcher,Integer> rt = new LiveFieldValues<IndexSearcher,Integer>(mgr, missing) {
@Override
protected Integer lookupFromSearcher(IndexSearcher s, String id) throws IOException {
TermQuery tq = new TermQuery(new Term("id", id));
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java Wed May 22 15:51:08 2013
@@ -38,13 +38,6 @@ import com.carrotsearch.randomizedtestin
*
* @see TestPositionIncrement
*/
-/*
- * Remove ThreadLeaks and run with (Eclipse or command line):
- * -ea -Drt.seed=AFD1E7E84B35D2B1
- * to get leaked thread errors.
- */
-// @ThreadLeaks(linger = 1000, leakedThreadsBelongToSuite = true)
-@Seed("AFD1E7E84B35D2B1")
public class TestPhraseQuery extends LuceneTestCase {
/** threshold for comparing floats */
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/store/TestMockDirectoryWrapper.java Wed May 22 15:51:08 2013
@@ -51,4 +51,42 @@ public class TestMockDirectoryWrapper ex
iw.close();
dir.close();
}
+
+ public void testDiskFull() throws IOException {
+ // test writeBytes
+ MockDirectoryWrapper dir = newMockDirectory();
+ dir.setMaxSizeInBytes(3);
+ final byte[] bytes = new byte[] { 1, 2};
+ IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT);
+ out.writeBytes(bytes, bytes.length); // first write should succeed
+ // flush() to ensure the written bytes are not buffered and counted
+ // against the directory size
+ out.flush();
+ try {
+ out.writeBytes(bytes, bytes.length);
+ fail("should have failed on disk full");
+ } catch (IOException e) {
+ // expected
+ }
+ out.close();
+ dir.close();
+
+ // test copyBytes
+ dir = newMockDirectory();
+ dir.setMaxSizeInBytes(3);
+ out = dir.createOutput("foo", IOContext.DEFAULT);
+ out.copyBytes(new ByteArrayDataInput(bytes), bytes.length); // first copy should succeed
+ // flush() to ensure the written bytes are not buffered and counted
+ // against the directory size
+ out.flush();
+ try {
+ out.copyBytes(new ByteArrayDataInput(bytes), bytes.length);
+ fail("should have failed on disk full");
+ } catch (IOException e) {
+ // expected
+ }
+ out.close();
+ dir.close();
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java Wed May 22 15:51:08 2013
@@ -66,13 +66,10 @@ public class TestMaxFailuresRule extends
@Test
public void testMaxFailures() {
- int maxFailures = LuceneTestCase.ignoreAfterMaxFailures.maxFailures;
- int failuresSoFar = LuceneTestCase.ignoreAfterMaxFailures.failuresSoFar;
+ TestRuleIgnoreAfterMaxFailures newRule = new TestRuleIgnoreAfterMaxFailures(2);
+ TestRuleIgnoreAfterMaxFailures prevRule = LuceneTestCase.replaceMaxFailureRule(newRule);
System.clearProperty(SysGlobals.SYSPROP_ITERATIONS());
try {
- LuceneTestCase.ignoreAfterMaxFailures.maxFailures = 2;
- LuceneTestCase.ignoreAfterMaxFailures.failuresSoFar = 0;
-
JUnitCore core = new JUnitCore();
final StringBuilder results = new StringBuilder();
core.addListener(new RunListener() {
@@ -110,8 +107,7 @@ public class TestMaxFailuresRule extends
results.toString().matches("(S*F){2}A+"));
} finally {
- LuceneTestCase.ignoreAfterMaxFailures.maxFailures = maxFailures;
- LuceneTestCase.ignoreAfterMaxFailures.failuresSoFar = failuresSoFar;
+ LuceneTestCase.replaceMaxFailureRule(prevRule);
}
}
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java Wed May 22 15:51:08 2013
@@ -25,7 +25,7 @@ public class TestTimSorter extends BaseS
@Override
public Sorter newSorter(Entry[] arr) {
- return new ArrayTimSorter<Entry>(arr, ArrayUtil.<Entry>naturalComparator(), random().nextInt(arr.length));
+ return new ArrayTimSorter<Entry>(arr, ArrayUtil.<Entry>naturalComparator(), _TestUtil.nextInt(random(), 0, arr.length));
}
}
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Wed May 22 15:51:08 2013
@@ -593,7 +593,7 @@ public class TestFSTs extends LuceneTest
// TODO: can FST be used to index all internal substrings,
// mapping to term?
- // java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.9.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
+ // java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.10.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
public static void main(String[] args) throws IOException {
int prune = 0;
int limit = Integer.MAX_VALUE;
Modified: lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java (original)
+++ lucene/dev/branches/lucene4956/lucene/core/src/test/org/apache/lucene/util/junitcompat/WithNestedTests.java Wed May 22 15:51:08 2013
@@ -23,11 +23,13 @@ import java.io.UnsupportedEncodingExcept
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures;
import org.apache.lucene.util.TestRuleIgnoreTestSuites;
import org.apache.lucene.util.TestRuleMarkFailure;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
+import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
@@ -66,6 +68,22 @@ public abstract class WithNestedTests {
private ByteArrayOutputStream sysout;
private ByteArrayOutputStream syserr;
+ @ClassRule
+ public static final TestRule classRules = RuleChain.outerRule(new TestRuleAdapter() {
+ private TestRuleIgnoreAfterMaxFailures prevRule;
+
+ protected void before() throws Throwable {
+ TestRuleIgnoreAfterMaxFailures newRule = new TestRuleIgnoreAfterMaxFailures(Integer.MAX_VALUE);
+ prevRule = LuceneTestCase.replaceMaxFailureRule(newRule);
+ }
+
+ protected void afterAlways(List<Throwable> errors) throws Throwable {
+ if (prevRule != null) {
+ LuceneTestCase.replaceMaxFailureRule(prevRule);
+ }
+ }
+ });
+
/**
* Restore properties after test.
*/
@@ -86,7 +104,7 @@ public abstract class WithNestedTests {
})
.around(marker);
}
-
+
@Before
public final void before() {
if (suppressOutputStreams) {
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java Wed May 22 15:51:08 2013
@@ -48,7 +48,7 @@ public class MultiAssociationsFacetsAggr
* Creates a new {@link MultiAssociationsFacetsAggregator} over the given
* aggregators. The mapping is used by
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to
- * rollup the values of the speicfic category by the corresponding
+ * rollup the values of the specific category by the corresponding
* {@link FacetsAggregator}. However, since each {@link FacetsAggregator}
* handles the associations of a specific type, which could cover multiple
* categories, the aggregation is done on the unique set of aggregators, which
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java Wed May 22 15:51:08 2013
@@ -119,7 +119,7 @@ public class RangeAccumulator extends Fa
}
@Override
- protected boolean requiresDocScores() {
+ public boolean requiresDocScores() {
return false;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java Wed May 22 15:51:08 2013
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.regex.Pattern;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
@@ -90,7 +89,7 @@ public final class DrillDownQuery extend
}
/** Used by DrillSideways */
- DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses) {
+ DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
this.fip = fip;
this.query = new BooleanQuery(true);
if (baseQuery != null) {
@@ -98,21 +97,8 @@ public final class DrillDownQuery extend
}
for(Query clause : clauses) {
query.add(clause, Occur.MUST);
- drillDownDims.put(getDim(clause), drillDownDims.size());
}
- }
-
- String getDim(Query clause) {
- assert clause instanceof ConstantScoreQuery;
- clause = ((ConstantScoreQuery) clause).getQuery();
- assert clause instanceof TermQuery || clause instanceof BooleanQuery;
- String term;
- if (clause instanceof TermQuery) {
- term = ((TermQuery) clause).getTerm().text();
- } else {
- term = ((TermQuery) ((BooleanQuery) clause).getClauses()[0].getQuery()).getTerm().text();
- }
- return term.split(Pattern.quote(Character.toString(fip.getFacetDelimChar())), 2)[0];
+ this.drillDownDims.putAll(drillDownDims);
}
/**
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Wed May 22 15:51:08 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -94,6 +95,11 @@ public class DrillSideways {
BooleanClause[] clauses = in.getBooleanQuery().getClauses();
Map<String,Integer> drillDownDims = in.getDims();
+ String[] dimsByIndex = new String[drillDownDims.size()];
+ for(Map.Entry<String,Integer> ent : drillDownDims.entrySet()) {
+ dimsByIndex[ent.getValue()] = ent.getKey();
+ }
+
int startClause;
if (clauses.length == drillDownDims.size()) {
startClause = 0;
@@ -107,13 +113,15 @@ public class DrillSideways {
// baseQuery:
List<Query> nonFacetClauses = new ArrayList<Query>();
List<Query> facetClauses = new ArrayList<Query>();
+ Map<String,Integer> dimToIndex = new LinkedHashMap<String,Integer>();
for(int i=startClause;i<clauses.length;i++) {
Query q = clauses[i].getQuery();
- String dim = in.getDim(q);
+ String dim = dimsByIndex[i-startClause];
if (!facetDims.contains(dim)) {
nonFacetClauses.add(q);
} else {
facetClauses.add(q);
+ dimToIndex.put(dim, dimToIndex.size());
}
}
@@ -127,7 +135,7 @@ public class DrillSideways {
newBaseQuery.add(q, BooleanClause.Occur.MUST);
}
- return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses);
+ return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses, dimToIndex);
} else {
// No change:
return in;
@@ -157,6 +165,20 @@ public class DrillSideways {
return new DrillSidewaysResult(c.getFacetResults(), null);
}
+ List<FacetRequest> ddRequests = new ArrayList<FacetRequest>();
+ for(FacetRequest fr : fsp.facetRequests) {
+ assert fr.categoryPath.length > 0;
+ if (!drillDownDims.containsKey(fr.categoryPath.components[0])) {
+ ddRequests.add(fr);
+ }
+ }
+ FacetSearchParams fsp2;
+ if (!ddRequests.isEmpty()) {
+ fsp2 = new FacetSearchParams(fsp.indexingParams, ddRequests);
+ } else {
+ fsp2 = null;
+ }
+
BooleanQuery ddq = query.getBooleanQuery();
BooleanClause[] clauses = ddq.getClauses();
@@ -173,7 +195,7 @@ public class DrillSideways {
startClause = 1;
}
- FacetsCollector drillDownCollector = FacetsCollector.create(getDrillDownAccumulator(fsp));
+ FacetsCollector drillDownCollector = fsp2 == null ? null : FacetsCollector.create(getDrillDownAccumulator(fsp2));
FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
@@ -225,6 +247,8 @@ public class DrillSideways {
break;
}
}
+ } else {
+ useCollectorMethod = true;
}
}
}
@@ -246,6 +270,7 @@ public class DrillSideways {
List<FacetResult> mergedResults = new ArrayList<FacetResult>();
int[] requestUpto = new int[drillDownDims.size()];
+ int ddUpto = 0;
for(int i=0;i<fsp.facetRequests.size();i++) {
FacetRequest fr = fsp.facetRequests.get(i);
assert fr.categoryPath.length > 0;
@@ -260,7 +285,7 @@ public class DrillSideways {
//System.out.println("get DD results");
}
//System.out.println("add dd results " + i);
- mergedResults.add(drillDownResults.get(i));
+ mergedResults.add(drillDownResults.get(ddUpto++));
} else {
// Drill sideways dim:
int dim = dimIndex.intValue();
@@ -359,7 +384,7 @@ public class DrillSideways {
subQuery.setMinimumNumberShouldMatch(minShouldMatch);
- //System.out.println("EXE " + topQuery);
+ // System.out.println("EXE " + topQuery);
// Collects against the passed-in
// drillDown/SidewaysCollectors as a side effect:
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java Wed May 22 15:51:08 2013
@@ -82,7 +82,9 @@ class DrillSidewaysCollector extends Col
// drillDown collector:
//System.out.println(" hit " + drillDownCollector);
hitCollector.collect(doc);
- drillDownCollector.collect(doc);
+ if (drillDownCollector != null) {
+ drillDownCollector.collect(doc);
+ }
// Also collect across all drill-sideways counts so
// we "merge in" drill-down counts for this
@@ -98,21 +100,28 @@ class DrillSidewaysCollector extends Col
}
} else {
+ boolean found = false;
for(int i=0;i<subScorers.length;i++) {
if (subScorers[i] == null) {
// This segment did not have any docs with this
// drill-down field & value:
- continue;
+ drillSidewaysCollectors[i].collect(doc);
+ assert allMatchesFrom(i+1, doc);
+ found = true;
+ break;
}
int subDoc = subScorers[i].docID();
- //System.out.println(" sub: " + subDoc);
+ //System.out.println(" i=" + i + " sub: " + subDoc);
if (subDoc != doc) {
+ //System.out.println(" +ds[" + i + "]");
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
+ found = true;
break;
}
}
+ assert found;
}
}
@@ -134,8 +143,11 @@ class DrillSidewaysCollector extends Col
@Override
public void setNextReader(AtomicReaderContext leaf) throws IOException {
+ //System.out.println("DS.setNextReader reader=" + leaf.reader());
hitCollector.setNextReader(leaf);
- drillDownCollector.setNextReader(leaf);
+ if (drillDownCollector != null) {
+ drillDownCollector.setNextReader(leaf);
+ }
for(Collector dsc : drillSidewaysCollectors) {
dsc.setNextReader(leaf);
}
@@ -166,7 +178,9 @@ class DrillSidewaysCollector extends Col
Arrays.fill(subScorers, null);
findScorers(scorer);
hitCollector.setScorer(scorer);
- drillDownCollector.setScorer(scorer);
+ if (drillDownCollector != null) {
+ drillDownCollector.setScorer(scorer);
+ }
for(Collector dsc : drillSidewaysCollectors) {
dsc.setScorer(scorer);
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java Wed May 22 15:51:08 2013
@@ -63,8 +63,10 @@ class DrillSidewaysScorer extends Scorer
//}
//System.out.println("score r=" + context.reader());
collector.setScorer(this);
- drillDownCollector.setScorer(this);
- drillDownCollector.setNextReader(context);
+ if (drillDownCollector != null) {
+ drillDownCollector.setScorer(this);
+ drillDownCollector.setNextReader(context);
+ }
for(DocsEnumsAndFreq dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
@@ -393,7 +395,9 @@ class DrillSidewaysScorer extends Scorer
//}
collector.collect(collectDocID);
- drillDownCollector.collect(collectDocID);
+ if (drillDownCollector != null) {
+ drillDownCollector.collect(collectDocID);
+ }
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java Wed May 22 15:51:08 2013
@@ -196,7 +196,7 @@ public class FacetsAccumulator {
return res;
}
- protected boolean requiresDocScores() {
+ public boolean requiresDocScores() {
return getAggregator().requiresDocScores();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java Wed May 22 15:51:08 2013
@@ -17,7 +17,10 @@ package org.apache.lucene.facet.range;
* limitations under the License.
*/
+import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
@@ -29,18 +32,30 @@ import org.apache.lucene.document.LongFi
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetTestUtils;
+import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.CountFacetRequest;
import org.apache.lucene.facet.search.DrillDownQuery;
+import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
+import org.apache.lucene.facet.search.DrillSideways;
+import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
+import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
public class TestRangeAccumulator extends FacetTestCase {
@@ -81,6 +96,114 @@ public class TestRangeAccumulator extend
d.close();
}
+ /** Tests single request that mixes Range and non-Range
+ * faceting, with DrillSideways. */
+ public void testMixedRangeAndNonRange() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ Directory td = newDirectory();
+ DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
+ FacetFields ff = new FacetFields(tw);
+
+ for(long l=0;l<100;l++) {
+ Document doc = new Document();
+ // For computing range facet counts:
+ doc.add(new NumericDocValuesField("field", l));
+ // For drill down by numeric range:
+ doc.add(new LongField("field", l, Field.Store.NO));
+
+ CategoryPath cp;
+ if ((l&3) == 0) {
+ cp = new CategoryPath("dim", "a");
+ } else {
+ cp = new CategoryPath("dim", "b");
+ }
+ ff.addFields(doc, Collections.singletonList(cp));
+ w.addDocument(doc);
+ }
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ final TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
+ tw.close();
+
+ IndexSearcher s = newSearcher(r);
+
+ final FacetSearchParams fsp = new FacetSearchParams(
+ new CountFacetRequest(new CategoryPath("dim"), 2),
+ new RangeFacetRequest<LongRange>("field",
+ new LongRange("less than 10", 0L, true, 10L, false),
+ new LongRange("less than or equal to 10", 0L, true, 10L, true),
+ new LongRange("over 90", 90L, false, 100L, false),
+ new LongRange("90 or above", 90L, true, 100L, false),
+ new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
+
+ final Set<String> dimSeen = new HashSet<String>();
+
+ DrillSideways ds = new DrillSideways(s, tr) {
+ @Override
+ protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) {
+ checkSeen(fsp);
+ return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr);
+ }
+
+ @Override
+ protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) {
+ checkSeen(fsp);
+ return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr);
+ }
+
+ private void checkSeen(FacetSearchParams fsp) {
+ // Each dim should should up only once, across
+ // both drillDown and drillSideways requests:
+ for(FacetRequest fr : fsp.facetRequests) {
+ String dim = fr.categoryPath.components[0];
+ assertFalse("dim " + dim + " already seen", dimSeen.contains(dim));
+ dimSeen.add(dim);
+ }
+ }
+
+ @Override
+ protected boolean scoreSubDocsAtOnce() {
+ return random().nextBoolean();
+ }
+ };
+
+ // First search, no drill downs:
+ DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
+ DrillSidewaysResult dsr = ds.search(null, ddq, 10, fsp);
+
+ assertEquals(100, dsr.hits.totalHits);
+ assertEquals(2, dsr.facetResults.size());
+ assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
+ assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
+
+ // Second search, drill down on dim=b:
+ ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
+ ddq.add(new CategoryPath("dim", "b"));
+ dimSeen.clear();
+ dsr = ds.search(null, ddq, 10, fsp);
+
+ assertEquals(75, dsr.hits.totalHits);
+ assertEquals(2, dsr.facetResults.size());
+ assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
+ assertEquals("field (0)\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
+
+ // Third search, drill down on "less than or equal to 10":
+ ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
+ ddq.add("field", NumericRangeQuery.newLongRange("field", 0L, 10L, true, true));
+ dimSeen.clear();
+ dsr = ds.search(null, ddq, 10, fsp);
+
+ assertEquals(11, dsr.hits.totalHits);
+ assertEquals(2, dsr.facetResults.size());
+ assertEquals("dim (0)\n b (8)\n a (3)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
+ assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
+
+ IOUtils.close(tr, td, r, d);
+ }
+
public void testBasicDouble() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Modified: lucene/dev/branches/lucene4956/lucene/module-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/module-build.xml?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/module-build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/module-build.xml Wed May 22 15:51:08 2013
@@ -220,7 +220,29 @@
</ant>
<property name="facet-javadocs.uptodate" value="true"/>
</target>
-
+
+ <property name="replicator.jar" value="${common.dir}/build/replicator/lucene-replicator-${version}.jar"/>
+ <target name="check-replicator-uptodate" unless="replicator.uptodate">
+ <module-uptodate name="replicator" jarfile="${replicator.jar}" property="replicator.uptodate"/>
+ </target>
+ <target name="jar-replicator" unless="replicator.uptodate" depends="check-replicator-uptodate">
+ <ant dir="${common.dir}/replicator" target="jar-core" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="replicator.uptodate" value="true"/>
+ </target>
+
+ <property name="replicator-javadoc.jar" value="${common.dir}/build/replicator/lucene-replicator-${version}-javadoc.jar"/>
+ <target name="check-replicator-javadocs-uptodate" unless="replicator-javadocs.uptodate">
+ <module-uptodate name="replicator" jarfile="${replicator-javadoc.jar}" property="replicator-javadocs.uptodate"/>
+ </target>
+ <target name="javadocs-replicator" unless="replicator-javadocs.uptodate" depends="check-replicator-javadocs-uptodate">
+ <ant dir="${common.dir}/replicator" target="javadocs" inheritAll="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="replicator-javadocs.uptodate" value="true"/>
+ </target>
+
<property name="analyzers-icu.jar" value="${common.dir}/build/analysis/icu/lucene-analyzers-icu-${version}.jar"/>
<target name="check-analyzers-icu-uptodate" unless="analyzers-icu.uptodate">
<module-uptodate name="analysis/icu" jarfile="${analyzers-icu.jar}" property="analyzers-icu.uptodate"/>
Modified: lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java Wed May 22 15:51:08 2013
@@ -58,7 +58,7 @@ public class CustomScoreQuery extends Qu
* @param subQuery the sub query whose scored is being customized. Must not be null.
*/
public CustomScoreQuery(Query subQuery) {
- this(subQuery, new Query[0]);
+ this(subQuery, new FunctionQuery[0]);
}
/**
@@ -67,9 +67,9 @@ public class CustomScoreQuery extends Qu
* @param scoringQuery a value source query whose scores are used in the custom score
* computation. This parameter is optional - it can be null.
*/
- public CustomScoreQuery(Query subQuery, Query scoringQuery) {
+ public CustomScoreQuery(Query subQuery, FunctionQuery scoringQuery) {
this(subQuery, scoringQuery!=null ? // don't want an array that contains a single null..
- new Query[] {scoringQuery} : new Query[0]);
+ new FunctionQuery[] {scoringQuery} : new FunctionQuery[0]);
}
/**
@@ -78,7 +78,7 @@ public class CustomScoreQuery extends Qu
* @param scoringQueries value source queries whose scores are used in the custom score
* computation. This parameter is optional - it can be null or even an empty array.
*/
- public CustomScoreQuery(Query subQuery, Query... scoringQueries) {
+ public CustomScoreQuery(Query subQuery, FunctionQuery... scoringQueries) {
this.subQuery = subQuery;
this.scoringQueries = scoringQueries !=null?
scoringQueries : new Query[0];
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java Wed May 22 15:51:08 2013
@@ -19,8 +19,8 @@ package org.apache.lucene.queryparser.an
import java.io.IOException;
import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -31,36 +31,29 @@ import org.apache.lucene.util.Version;
/**
* Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
- * are also passed through the given analyzer, but wild card characters (like <code>*</code>)
- * don't get removed from the search terms.
+ * are also passed through the given analyzer, but wildcard characters <code>*</code> and
+ * <code>?</code> don't get removed from the search terms.
*
* <p><b>Warning:</b> This class should only be used with analyzers that do not use stopwords
* or that add tokens. Also, several stemming analyzers are inappropriate: for example, GermanAnalyzer
* will turn <code>Häuser</code> into <code>hau</code>, but <code>H?user</code> will
* become <code>h?user</code> when using this parser and thus no match would be found (i.e.
* using this parser will be no improvement over QueryParser in such cases).
- *
*/
public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser {
-
- /**
- * Constructs a query parser.
- * @param field the default field for query terms.
- * @param analyzer used to find terms in the query text.
- */
+ // gobble escaped chars or find a wildcard character
+ private final Pattern wildcardPattern = Pattern.compile("(\\.)|([?*]+)");
public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
super(matchVersion, field, analyzer);
setAnalyzeRangeTerms(true);
}
/**
- * Called when parser
- * parses an input term token that contains one or more wildcard
- * characters (like <code>*</code>), but is not a prefix term token (one
- * that has just a single * character at the end).
+ * Called when parser parses an input term that contains one or more wildcard
+ * characters (like <code>*</code>), but is not a prefix term (one that has
+ * just a single <code>*</code> character at the end).
* <p>
- * Example: will be called for <code>H?user</code> or for <code>H*user</code>
- * but not for <code>*user</code>.
+ * Example: will be called for <code>H?user</code> or for <code>H*user</code>.
* <p>
* Depending on analyzer and settings, a wildcard term may (most probably will)
* be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -68,113 +61,52 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token that contains one or more wild card
+ * @param termStr Term that contains one or more wildcard
* characters (? or *), but is not simple prefix term
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
- List<String> tlist = new ArrayList<String>();
- List<String> wlist = new ArrayList<String>();
- /* somewhat a hack: find/store wildcard chars
- * in order to put them back after analyzing */
- boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
- StringBuilder tmpBuffer = new StringBuilder();
- char[] chars = termStr.toCharArray();
- for (int i = 0; i < termStr.length(); i++) {
- if (chars[i] == '?' || chars[i] == '*') {
- if (isWithinToken) {
- tlist.add(tmpBuffer.toString());
- tmpBuffer.setLength(0);
- }
- isWithinToken = false;
- } else {
- if (!isWithinToken) {
- wlist.add(tmpBuffer.toString());
- tmpBuffer.setLength(0);
- }
- isWithinToken = true;
- }
- tmpBuffer.append(chars[i]);
- }
- if (isWithinToken) {
- tlist.add(tmpBuffer.toString());
- } else {
- wlist.add(tmpBuffer.toString());
- }
- // get Analyzer from superclass and tokenize the term
- TokenStream source;
-
- int countTokens = 0;
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- source.reset();
- } catch (IOException e1) {
- throw new RuntimeException(e1);
+ if (termStr == null){
+ //can't imagine this would ever happen
+ throw new ParseException("Passed null value as term to getWildcardQuery");
+ }
+ if ( ! getAllowLeadingWildcard() && (termStr.startsWith("*") || termStr.startsWith("?"))) {
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"
+ + " unless getAllowLeadingWildcard() returns true");
}
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- while (true) {
- try {
- if (!source.incrementToken()) break;
- } catch (IOException e) {
- break;
- }
- String term = termAtt.toString();
- if (!"".equals(term)) {
- try {
- tlist.set(countTokens++, term);
- } catch (IndexOutOfBoundsException ioobe) {
- countTokens = -1;
- }
- }
- }
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
-
- if (countTokens != tlist.size()) {
- /* this means that the analyzer used either added or consumed
- * (common for a stemmer) tokens, and we can't build a WildcardQuery */
- throw new ParseException("Cannot build WildcardQuery with analyzer "
- + getAnalyzer().getClass() + " - tokens added or lost");
+
+ Matcher wildcardMatcher = wildcardPattern.matcher(termStr);
+ StringBuilder sb = new StringBuilder();
+ int last = 0;
+
+ while (wildcardMatcher.find()){
+ // continue if escaped char
+ if (wildcardMatcher.group(1) != null){
+ continue;
+ }
+
+ if (wildcardMatcher.start() > 0){
+ String chunk = termStr.substring(last, wildcardMatcher.start());
+ String analyzed = analyzeSingleChunk(field, termStr, chunk);
+ sb.append(analyzed);
+ }
+ //append the wildcard character
+ sb.append(wildcardMatcher.group(2));
+
+ last = wildcardMatcher.end();
}
-
- if (tlist.size() == 0) {
- return null;
- } else if (tlist.size() == 1) {
- if (wlist != null && wlist.size() == 1) {
- /* if wlist contains one wildcard, it must be at the end, because:
- * 1) wildcards are not allowed in 1st position of a term by QueryParser
- * 2) if wildcard was *not* in end, there would be *two* or more tokens */
- return super.getWildcardQuery(field, tlist.get(0)
- + wlist.get(0).toString());
- } else {
- /* we should never get here! if so, this method was called
- * with a termStr containing no wildcard ... */
- throw new IllegalArgumentException("getWildcardQuery called without wildcard");
- }
- } else {
- /* the term was tokenized, let's rebuild to one token
- * with wildcards put back in postion */
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < tlist.size(); i++) {
- sb.append( tlist.get(i));
- if (wlist != null && wlist.size() > i) {
- sb.append(wlist.get(i));
- }
- }
- return super.getWildcardQuery(field, sb.toString());
+ if (last < termStr.length()){
+ sb.append(analyzeSingleChunk(field, termStr, termStr.substring(last)));
}
+ return super.getWildcardQuery(field, sb.toString());
}
-
+
/**
* Called when parser parses an input term
- * token that uses prefix notation; that is, contains a single '*' wildcard
+ * that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily,
* this usually results in a different query object.
@@ -185,52 +117,19 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
+ * @param termStr Term to use for building term for the query
* (<b>without</b> trailing '*' character!)
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
- // get Analyzer from superclass and tokenize the term
- TokenStream source;
- List<String> tlist = new ArrayList<String>();
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- source.reset();
- } catch (IOException e1) {
- throw new RuntimeException(e1);
- }
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- while (true) {
- try {
- if (!source.incrementToken()) break;
- } catch (IOException e) {
- break;
- }
- tlist.add(termAtt.toString());
- }
-
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
-
- if (tlist.size() == 1) {
- return super.getPrefixQuery(field, tlist.get(0));
- } else {
- /* this means that the analyzer used either added or consumed
- * (common for a stemmer) tokens, and we can't build a PrefixQuery */
- throw new ParseException("Cannot build PrefixQuery with analyzer "
- + getAnalyzer().getClass()
- + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
- }
+ String analyzed = analyzeSingleChunk(field, termStr, termStr);
+ return super.getPrefixQuery(field, analyzed);
}
/**
- * Called when parser parses an input term token that has the fuzzy suffix (~) appended.
+ * Called when parser parses an input term that has the fuzzy suffix (~) appended.
* <p>
* Depending on analyzer and settings, a fuzzy term may (most probably will)
* be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -238,42 +137,73 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
+ * @param termStr Term to use for building term for the query
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException {
- // get Analyzer from superclass and tokenize the term
- TokenStream source = null;
- String nextToken = null;
- boolean multipleTokens = false;
-
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- source.reset();
- if (source.incrementToken()) {
- nextToken = termAtt.toString();
- }
- multipleTokens = source.incrementToken();
- } catch (IOException e) {
- nextToken = null;
- }
-
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
+
+ String analyzed = analyzeSingleChunk(field, termStr, termStr);
+ return super.getFuzzyQuery(field, analyzed, minSimilarity);
+ }
- if (multipleTokens) {
- throw new ParseException("Cannot build FuzzyQuery with analyzer " + getAnalyzer().getClass()
- + " - tokens were added");
+ /**
+ * Returns the analyzed form for the given chunk
+ *
+ * If the analyzer produces more than one output token from the given chunk,
+ * a ParseException is thrown.
+ *
+ * @param field The target field
+ * @param termStr The full term from which the given chunk is excerpted
+ * @param chunk The portion of the given termStr to be analyzed
+ * @return The result of analyzing the given chunk
+ * @throws ParseException when analysis returns other than one output token
+ */
+ protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
+ String analyzed = null;
+ TokenStream stream = null;
+ try{
+ stream = getAnalyzer().tokenStream(field, new StringReader(chunk));
+ stream.reset();
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
+ // get first and hopefully only output token
+ if (stream.incrementToken()) {
+ analyzed = termAtt.toString();
+
+ // try to increment again, there should only be one output token
+ StringBuilder multipleOutputs = null;
+ while (stream.incrementToken()) {
+ if (null == multipleOutputs) {
+ multipleOutputs = new StringBuilder();
+ multipleOutputs.append('"');
+ multipleOutputs.append(analyzed);
+ multipleOutputs.append('"');
+ }
+ multipleOutputs.append(',');
+ multipleOutputs.append('"');
+ multipleOutputs.append(termAtt.toString());
+ multipleOutputs.append('"');
+ }
+ stream.end();
+ stream.close();
+ if (null != multipleOutputs) {
+ throw new ParseException(
+ String.format(getLocale(),
+ "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
+ }
+ } else {
+ // nothing returned by analyzer. Was it a stop word and the user accidentally
+ // used an analyzer with stop words?
+ stream.end();
+ stream.close();
+ throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
+ }
+ } catch (IOException e){
+ throw new ParseException(
+ String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
}
-
- return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
+ return analyzed;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java Wed May 22 15:51:08 2013
@@ -572,24 +572,53 @@ public abstract class QueryParserBase im
if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
// no phrase query:
- BooleanQuery q = newBooleanQuery(positionCount == 1);
-
- BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
- BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
-
- for (int i = 0; i < numTokens; i++) {
- try {
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- termAtt.fillBytesRef();
- } catch (IOException e) {
- // safe to ignore, because we know the number of tokens
+
+ if (positionCount == 1) {
+ // simple case: only one position, with synonyms
+ BooleanQuery q = newBooleanQuery(true);
+ for (int i = 0; i < numTokens; i++) {
+ try {
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ termAtt.fillBytesRef();
+ } catch (IOException e) {
+ // safe to ignore, because we know the number of tokens
+ }
+ Query currentQuery = newTermQuery(
+ new Term(field, BytesRef.deepCopyOf(bytes)));
+ q.add(currentQuery, BooleanClause.Occur.SHOULD);
+ }
+ return q;
+ } else {
+ // multiple positions
+ BooleanQuery q = newBooleanQuery(false);
+ final BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
+ Query currentQuery = null;
+ for (int i = 0; i < numTokens; i++) {
+ try {
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ termAtt.fillBytesRef();
+ } catch (IOException e) {
+ // safe to ignore, because we know the number of tokens
+ }
+ if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
+ if (!(currentQuery instanceof BooleanQuery)) {
+ Query t = currentQuery;
+ currentQuery = newBooleanQuery(true);
+ ((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
+ }
+ ((BooleanQuery)currentQuery).add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
+ } else {
+ if (currentQuery != null) {
+ q.add(currentQuery, occur);
+ }
+ currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
+ }
}
- Query currentQuery = newTermQuery(
- new Term(field, BytesRef.deepCopyOf(bytes)));
q.add(currentQuery, occur);
+ return q;
}
- return q;
}
else {
// phrase query:
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java Wed May 22 15:51:08 2013
@@ -33,13 +33,9 @@ import org.apache.lucene.search.MultiTer
public interface CommonQueryParserConfiguration {
/**
- * Set to <code>true</code> to allow leading wildcard characters.
- * <p>
- * When set, <code>*</code> or <code>?</code> are allowed as the first
- * character of a PrefixQuery and WildcardQuery. Note that this can produce
- * very slow queries on big indexes.
- * <p>
- * Default: false.
+ * Whether terms of multi-term queries (e.g., wildcard,
+ * prefix, fuzzy and range) should be automatically
+ * lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms);
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java Wed May 22 15:51:08 2013
@@ -58,7 +58,7 @@ public class StandardQueryConfigHandler
final public static ConfigurationKey<Boolean> ENABLE_POSITION_INCREMENTS = ConfigurationKey.newInstance();
/**
- * Key used to set whether expanded terms should be expanded
+ * Key used to set whether expanded terms should be lower-cased
*
* @see StandardQueryParser#setLowercaseExpandedTerms(boolean)
* @see StandardQueryParser#getLowercaseExpandedTerms()
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java Wed May 22 15:51:08 2013
@@ -19,8 +19,17 @@ package org.apache.lucene.queryparser.an
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
+import java.util.TreeMap;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockBytesAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -33,11 +42,14 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
/**
*/
+@SuppressCodecs("Lucene3x") // binary terms
public class TestAnalyzingQueryParser extends LuceneTestCase {
-
+ private final static String FIELD = "field";
+
private Analyzer a;
private String[] wildcardInput;
@@ -49,12 +61,15 @@ public class TestAnalyzingQueryParser ex
private String[] fuzzyInput;
private String[] fuzzyExpected;
+ private Map<String, String> wildcardEscapeHits = new TreeMap<String, String>();
+ private Map<String, String> wildcardEscapeMisses = new TreeMap<String, String>();
+
@Override
public void setUp() throws Exception {
super.setUp();
- wildcardInput = new String[] { "übersetzung über*ung",
+ wildcardInput = new String[] { "*bersetzung über*ung",
"Mötley Cr\u00fce Mötl?* Crü?", "Renée Zellweger Ren?? Zellw?ger" };
- wildcardExpected = new String[] { "ubersetzung uber*ung", "motley crue motl?* cru?",
+ wildcardExpected = new String[] { "*bersetzung uber*ung", "motley crue motl?* cru?",
"renee zellweger ren?? zellw?ger" };
prefixInput = new String[] { "übersetzung übersetz*",
@@ -71,43 +86,138 @@ public class TestAnalyzingQueryParser ex
fuzzyExpected = new String[] { "ubersetzung ubersetzung~1",
"motley crue motley~1 crue~2", "renee zellweger renee~0 zellweger~2" };
+ wildcardEscapeHits.put("mö*tley", "moatley");
+
+ // need to have at least one genuine wildcard to trigger the wildcard analysis
+ // hence the * before the y
+ wildcardEscapeHits.put("mö\\*tl*y", "mo*tley");
+
+ // escaped backslash then true wildcard
+ wildcardEscapeHits.put("mö\\\\*tley", "mo\\atley");
+
+ // escaped wildcard then true wildcard
+ wildcardEscapeHits.put("mö\\??ley", "mo?tley");
+
+ // the first is an escaped * which should yield a miss
+ wildcardEscapeMisses.put("mö\\*tl*y", "moatley");
+
a = new ASCIIAnalyzer();
}
+ public void testSingleChunkExceptions() {
+ boolean ex = false;
+ String termStr = "the*tre";
+
+ Analyzer stopsAnalyzer = new MockAnalyzer
+ (random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
+ try {
+ String q = parseWithAnalyzingQueryParser(termStr, stopsAnalyzer, true);
+ } catch (ParseException e){
+ if (e.getMessage().contains("returned nothing")){
+ ex = true;
+ }
+ }
+ assertEquals("Should have returned nothing", true, ex);
+ ex = false;
+
+ AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+ try{
+ qp.analyzeSingleChunk(FIELD, "", "not a single chunk");
+ } catch (ParseException e){
+ if (e.getMessage().contains("multiple terms")){
+ ex = true;
+ }
+ }
+ assertEquals("Should have produced multiple terms", true, ex);
+ }
+
+ public void testWildcardAlone() throws ParseException {
+ //seems like crazy edge case, but can be useful in concordance
+ boolean pex = false;
+ try{
+ Query q = getAnalyzedQuery("*", a, false);
+ } catch (ParseException e){
+ pex = true;
+ }
+ assertEquals("Wildcard alone with allowWildcard=false", true, pex);
+
+ pex = false;
+ try {
+ String qString = parseWithAnalyzingQueryParser("*", a, true);
+ assertEquals("Every word", "*", qString);
+ } catch (ParseException e){
+ pex = true;
+ }
+
+ assertEquals("Wildcard alone with allowWildcard=true", false, pex);
+
+ }
+ public void testWildCardEscapes() throws ParseException, IOException {
+
+ for (Map.Entry<String, String> entry : wildcardEscapeHits.entrySet()){
+ Query q = getAnalyzedQuery(entry.getKey(), a, false);
+ assertEquals("WildcardEscapeHits: " + entry.getKey(), true, isAHit(q, entry.getValue(), a));
+ }
+ for (Map.Entry<String, String> entry : wildcardEscapeMisses.entrySet()){
+ Query q = getAnalyzedQuery(entry.getKey(), a, false);
+ assertEquals("WildcardEscapeMisses: " + entry.getKey(), false, isAHit(q, entry.getValue(), a));
+ }
+
+ }
+ public void testWildCardQueryNoLeadingAllowed() {
+ boolean ex = false;
+ try{
+ String q = parseWithAnalyzingQueryParser(wildcardInput[0], a, false);
+
+ } catch (ParseException e){
+ ex = true;
+ }
+ assertEquals("Testing initial wildcard not allowed",
+ true, ex);
+ }
+
public void testWildCardQuery() throws ParseException {
for (int i = 0; i < wildcardInput.length; i++) {
assertEquals("Testing wildcards with analyzer " + a.getClass() + ", input string: "
- + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a));
+ + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a, true));
}
}
+
public void testPrefixQuery() throws ParseException {
for (int i = 0; i < prefixInput.length; i++) {
assertEquals("Testing prefixes with analyzer " + a.getClass() + ", input string: "
- + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a));
+ + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a, false));
}
}
public void testRangeQuery() throws ParseException {
for (int i = 0; i < rangeInput.length; i++) {
assertEquals("Testing ranges with analyzer " + a.getClass() + ", input string: "
- + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a));
+ + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a, false));
}
}
public void testFuzzyQuery() throws ParseException {
for (int i = 0; i < fuzzyInput.length; i++) {
assertEquals("Testing fuzzys with analyzer " + a.getClass() + ", input string: "
- + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a));
+ + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a, false));
}
}
- private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException {
- AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "field", a);
+
+ private String parseWithAnalyzingQueryParser(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+ Query q = getAnalyzedQuery(s, a, allowLeadingWildcard);
+ return q.toString(FIELD);
+ }
+
+ private Query getAnalyzedQuery(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+ AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+ qp.setAllowLeadingWildcard(allowLeadingWildcard);
org.apache.lucene.search.Query q = qp.parse(s);
- return q.toString("field");
+ return q;
}
-
+
final static class FoldingFilter extends TokenFilter {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -144,31 +254,45 @@ public class TestAnalyzingQueryParser ex
final static class ASCIIAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(result, new FoldingFilter(result));
}
}
-
+
+
// LUCENE-4176
public void testByteTerms() throws Exception {
- Directory ramDir = newDirectory();
+ String s = "à¹à¸";
Analyzer analyzer = new MockBytesAnalyzer();
+ QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer);
+ Query q = qp.parse("[à¹à¸ TO à¹à¸]");
+ assertEquals(true, isAHit(q, s, analyzer));
+ }
+
+
+ private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
+ Directory ramDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
Document doc = new Document();
FieldType fieldType = new FieldType();
fieldType.setIndexed(true);
fieldType.setTokenized(true);
fieldType.setStored(true);
- Field field = new Field("content","à¹à¸", fieldType);
+ Field field = new Field(FIELD, content, fieldType);
doc.add(field);
writer.addDocument(doc);
writer.close();
DirectoryReader ir = DirectoryReader.open(ramDir);
- IndexSearcher is = newSearcher(ir);
- QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "content", analyzer);
- Query q = qp.parse("[à¹à¸ TO à¹à¸]");
- assertEquals(1, is.search(q, 10).totalHits);
+ IndexSearcher is = new IndexSearcher(ir);
+
+ int hits = is.search(q, 10).totalHits;
ir.close();
ramDir.close();
+ if (hits == 1){
+ return true;
+ } else {
+ return false;
+ }
+
}
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java Wed May 22 15:51:08 2013
@@ -17,9 +17,17 @@ package org.apache.lucene.queryparser.cl
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
@@ -27,6 +35,7 @@ import org.apache.lucene.queryparser.fle
import org.apache.lucene.queryparser.util.QueryParserTestBase;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -307,4 +316,178 @@ public class TestQueryParser extends Que
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
+ // TODO: fold these into QueryParserTestBase
+
+ /** adds synonym of "dog" for "dogs". */
+ static class MockSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ MockTokenizer tokenizer = new MockTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
+ }
+ }
+
+ /** simple synonyms test */
+ public void testSynonyms() throws Exception {
+ BooleanQuery expected = new BooleanQuery(true);
+ expected.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer());
+ assertEquals(expected, qp.parse("dogs"));
+ assertEquals(expected, qp.parse("\"dogs\""));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("dogs"));
+ assertEquals(expected, qp.parse("\"dogs\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("dogs^2"));
+ assertEquals(expected, qp.parse("\"dogs\"^2"));
+ }
+
+ /** forms multiphrase query */
+ public void testSynonymsPhrase() throws Exception {
+ MultiPhraseQuery expected = new MultiPhraseQuery();
+ expected.add(new Term("field", "old"));
+ expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer());
+ assertEquals(expected, qp.parse("\"old dogs\""));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("\"old dogs\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("\"old dogs\"^2"));
+ expected.setSlop(3);
+ assertEquals(expected, qp.parse("\"old dogs\"~3^2"));
+ }
+
+ /**
+ * adds synonym of "å" for "å½".
+ */
+ protected static class MockCJKSynonymFilter extends TokenFilter {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ boolean addSynonym = false;
+
+ public MockCJKSynonymFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (addSynonym) { // inject our synonym
+ clearAttributes();
+ termAtt.setEmpty().append("å");
+ posIncAtt.setPositionIncrement(0);
+ addSynonym = false;
+ return true;
+ }
+
+ if (input.incrementToken()) {
+ addSynonym = termAtt.toString().equals("å½");
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ static class MockCJKSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new SimpleCJKTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new MockCJKSynonymFilter(tokenizer));
+ }
+ }
+
+ /** simple CJK synonym test */
+ public void testCJKSynonym() throws Exception {
+ BooleanQuery expected = new BooleanQuery(true);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("å½"));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("å½^2"));
+ }
+
+ /** synonyms with default OR operator */
+ public void testCJKSynonymsOR() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("ä¸å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½^2"));
+ }
+
+ /** more complex synonyms with default OR operator */
+ public void testCJKSynonymsOR2() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.SHOULD);
+ BooleanQuery inner2 = new BooleanQuery(true);
+ inner2.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner2.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner2, BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("ä¸å½å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½å½^2"));
+ }
+
+ /** synonyms with default AND operator */
+ public void testCJKSynonymsAND() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.MUST);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.MUST);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("ä¸å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½^2"));
+ }
+
+ /** more complex synonyms with default AND operator */
+ public void testCJKSynonymsAND2() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.MUST);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.MUST);
+ BooleanQuery inner2 = new BooleanQuery(true);
+ inner2.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner2.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner2, BooleanClause.Occur.MUST);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("ä¸å½å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½å½^2"));
+ }
+
+ /** forms multiphrase query */
+ public void testCJKSynonymsPhrase() throws Exception {
+ MultiPhraseQuery expected = new MultiPhraseQuery();
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term[] { new Term("field", "å½"), new Term("field", "å")});
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("\"ä¸å½\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("\"ä¸å½\"^2"));
+ expected.setSlop(3);
+ assertEquals(expected, qp.parse("\"ä¸å½\"~3^2"));
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java Wed May 22 15:51:08 2013
@@ -236,7 +236,7 @@ public abstract class QueryParserTestBas
}
//individual CJK chars as terms, like StandardAnalyzer
- private class SimpleCJKTokenizer extends Tokenizer {
+ protected static class SimpleCJKTokenizer extends Tokenizer {
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public SimpleCJKTokenizer(Reader input) {
@@ -244,7 +244,7 @@ public abstract class QueryParserTestBas
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
int ch = input.read();
if (ch < 0)
return false;
@@ -1088,7 +1088,7 @@ public abstract class QueryParserTestBas
/**
* adds synonym of "dog" for "dogs".
*/
- private class MockSynonymFilter extends TokenFilter {
+ protected static class MockSynonymFilter extends TokenFilter {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
boolean addSynonym = false;
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/ivy.xml Wed May 22 15:51:08 2013
@@ -32,8 +32,8 @@
<dependency org="org.apache.ant" name="ant" rev="1.8.2" transitive="false" />
<dependency org="junit" name="junit" rev="4.10" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.9" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.9" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.10" transitive="false" conf="default->*;junit4-stdalone->*" />
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
Modified: lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java?rev=1485259&r1=1485258&r2=1485259&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4956/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java Wed May 22 15:51:08 2013
@@ -53,6 +53,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.util.BytesRef;
@@ -594,7 +595,9 @@ public abstract class BaseStoredFieldsFo
public void testBigDocuments() throws IOException {
// "big" as "much bigger than the chunk size"
// for this test we force a FS dir
- Directory dir = newFSDirectory(_TestUtil.getTempDir(getClass().getSimpleName()));
+ // we can't just use newFSDirectory, because this test doesn't really index anything.
+ // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
+ Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(_TestUtil.getTempDir("testBigDocuments")));
IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);