You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/06 01:58:31 UTC
svn commit: r1442821 [1/2] - in /lucene/dev/branches/lucene4547: ./
dev-tools/ dev-tools/maven/lucene/highlighter/ lucene/ lucene/analysis/
lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/
lucene/analysis/uima/src/test-files/uima/ lucene/...
Author: rmuir
Date: Wed Feb 6 00:58:30 2013
New Revision: 1442821
URL: http://svn.apache.org/viewvc?rev=1442821&view=rev
Log:
Merged /lucene/dev/trunk:r1441770-1442810
Removed:
lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-4.1.jar.sha1
lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt
lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-NOTICE.txt
lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/commons-io.txt
lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/jdk-deprecated.txt
lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/jdk.txt
lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/system-out.txt
lucene/dev/branches/lucene4547/lucene/tools/lib/
lucene/dev/branches/lucene4547/lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java
Modified:
lucene/dev/branches/lucene4547/ (props changed)
lucene/dev/branches/lucene4547/dev-tools/ (props changed)
lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template
lucene/dev/branches/lucene4547/lucene/ (props changed)
lucene/dev/branches/lucene4547/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene4547/lucene/analysis/ (props changed)
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
lucene/dev/branches/lucene4547/lucene/benchmark/ (props changed)
lucene/dev/branches/lucene4547/lucene/benchmark/build.xml
lucene/dev/branches/lucene4547/lucene/build.xml (contents, props changed)
lucene/dev/branches/lucene4547/lucene/common-build.xml (contents, props changed)
lucene/dev/branches/lucene4547/lucene/core/ (props changed)
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java
lucene/dev/branches/lucene4547/lucene/highlighter/ (props changed)
lucene/dev/branches/lucene4547/lucene/highlighter/build.xml
lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
lucene/dev/branches/lucene4547/lucene/licenses/ (props changed)
lucene/dev/branches/lucene4547/lucene/memory/ (props changed)
lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/lucene4547/lucene/queryparser/ (props changed)
lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package.html
lucene/dev/branches/lucene4547/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
lucene/dev/branches/lucene4547/lucene/test-framework/ (props changed)
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
lucene/dev/branches/lucene4547/lucene/tools/ (props changed)
lucene/dev/branches/lucene4547/lucene/tools/build.xml
lucene/dev/branches/lucene4547/lucene/tools/custom-tasks.xml
lucene/dev/branches/lucene4547/lucene/tools/ivy.xml
lucene/dev/branches/lucene4547/lucene/tools/src/java/lucene-solr.antlib.xml
lucene/dev/branches/lucene4547/solr/ (props changed)
lucene/dev/branches/lucene4547/solr/build.xml (contents, props changed)
lucene/dev/branches/lucene4547/solr/common-build.xml (contents, props changed)
lucene/dev/branches/lucene4547/solr/contrib/ (props changed)
lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml
lucene/dev/branches/lucene4547/solr/core/ (props changed)
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
lucene/dev/branches/lucene4547/solr/solrj/ (props changed)
lucene/dev/branches/lucene4547/solr/solrj/ivy.xml
Modified: lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template (original)
+++ lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template Wed Feb 6 00:58:30 2013
@@ -61,6 +61,11 @@
<artifactId>lucene-memory</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>lucene-queries</artifactId>
+ <version>${project.version}</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
Modified: lucene/dev/branches/lucene4547/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/CHANGES.txt?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/CHANGES.txt Wed Feb 6 00:58:30 2013
@@ -94,6 +94,12 @@ New Features
* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
creation via the resulting factories using NewAnalyzerTask. (Steve Rowe)
+* LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten
+ against the highlighting IndexReader to obtain primitive queries before
+ discarding the query entirely. WeightedSpanTermExtractor now builds a
+ MemoryIndex only once even if multiple fields are highlighted.
+ (Simon Willnauer)
+
API Changes
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
@@ -133,10 +139,18 @@ Bug Fixes
* LUCENE-4739: Fixed bugs that prevented FSTs more than ~1.1GB from
being saved and loaded (Adrien Grand, Mike McCandless)
+Documentation
+
+* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
+ (Hayden Muhl via Adrien Grand)
+
Build
* LUCENE-4636: Upgrade ivy to 2.3.0 (Shawn Heisey via Robert Muir)
+* LUCENE-4570: Use the Policeman Forbidden API checker, released separately
+ from Lucene and downloaded via Ivy. (Uwe Schindler, Robert Muir)
+
======================= Lucene 4.1.0 =======================
Changes in backwards compatibility policy
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Wed Feb 6 00:58:30 2013
@@ -28,6 +28,8 @@ import org.apache.uima.resource.Resource
import java.io.IOException;
import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
@@ -39,10 +41,10 @@ public abstract class BaseUIMATokenizer
protected final AnalysisEngine ae;
protected final CAS cas;
- protected BaseUIMATokenizer(Reader reader, String descriptorPath) {
+ protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
super(reader);
try {
- ae = AEProviderFactory.getInstance().getAEProvider(descriptorPath).getAE();
+ ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
cas = ae.newCAS();
} catch (ResourceInitializationException e) {
throw new RuntimeException(e);
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Wed Feb 6 00:58:30 2013
@@ -26,6 +26,7 @@ import org.apache.uima.cas.text.Annotati
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
/**
* a {@link Tokenizer} which creates tokens from UIMA Annotations
@@ -40,8 +41,8 @@ public final class UIMAAnnotationsTokeni
private int finalOffset = 0;
- public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
- super(input, descriptorPath);
+ public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
+ super(input, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.offsetAttr = addAttribute(OffsetAttribute.class);
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java Wed Feb 6 00:58:30 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.T
import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
import java.io.Reader;
+import java.util.HashMap;
import java.util.Map;
/**
@@ -31,19 +32,29 @@ public class UIMAAnnotationsTokenizerFac
private String descriptorPath;
private String tokenType;
+ private Map<String, Object> configurationParameters;
@Override
public void init(Map<String, String> args) {
super.init(args);
- descriptorPath = args.get("descriptorPath");
- tokenType = args.get("tokenType");
- if (descriptorPath == null || tokenType == null) {
- throw new IllegalArgumentException("Both descriptorPath and tokenType are mandatory");
+ configurationParameters = new HashMap<String, Object>();
+ for (String k : args.keySet()) {
+ if (k.equals("tokenType")) {
+ tokenType = args.get("tokenType");
+ } else if (k.equals("descriptorPath")) {
+ descriptorPath = args.get("descriptorPath");
+ } else {
+ configurationParameters.put(k, args.get(k));
+ }
}
+ if (descriptorPath == null || tokenType == null ) {
+ throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
+ }
+
}
@Override
public Tokenizer create(Reader input) {
- return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input);
+ return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input);
}
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java Wed Feb 6 00:58:30 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.io.Reader;
+import java.util.Map;
/**
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
@@ -28,15 +29,17 @@ public final class UIMABaseAnalyzer exte
private final String descriptorPath;
private final String tokenType;
+ private final Map<String, Object> configurationParameters;
- public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
+ public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
+ this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader));
+ return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, reader));
}
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java Wed Feb 6 00:58:30 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.io.Reader;
+import java.util.Map;
/**
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
@@ -28,15 +29,17 @@ public final class UIMATypeAwareAnalyzer
private final String descriptorPath;
private final String tokenType;
private final String featurePath;
+ private final Map<String, Object> configurationParameters;
- public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) {
+ public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
this.featurePath = featurePath;
+ this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader));
+ return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, reader));
}
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Wed Feb 6 00:58:30 2013
@@ -29,6 +29,7 @@ import org.apache.uima.cas.text.Annotati
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
/**
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
@@ -50,8 +51,8 @@ public final class UIMATypeAwareAnnotati
private int finalOffset = 0;
- public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
- super(input, descriptorPath);
+ public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
+ super(input, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.typeAttr = addAttribute(TypeAttribute.class);
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java Wed Feb 6 00:58:30 2013
@@ -18,10 +18,10 @@ package org.apache.lucene.analysis.uima;
*/
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import java.io.Reader;
+import java.util.HashMap;
import java.util.Map;
/**
@@ -32,13 +32,23 @@ public class UIMATypeAwareAnnotationsTok
private String descriptorPath;
private String tokenType;
private String featurePath;
+ private Map<String, Object> configurationParameters;
@Override
public void init(Map<String, String> args) {
super.init(args);
- descriptorPath = args.get("descriptorPath");
- tokenType = args.get("tokenType");
- featurePath = args.get("featurePath");
+ configurationParameters = new HashMap<String, Object>();
+ for (String k : args.keySet()) {
+ if (k.equals("featurePath")) {
+ featurePath = args.get("featurePath");
+ } else if (k.equals("tokenType")) {
+ tokenType = args.get("tokenType");
+ } else if (k.equals("descriptorPath")) {
+ descriptorPath = args.get("descriptorPath");
+ } else {
+ configurationParameters.put(k, args.get(k));
+ }
+ }
if (descriptorPath == null || tokenType == null || featurePath == null) {
throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory");
}
@@ -46,6 +56,6 @@ public class UIMATypeAwareAnnotationsTok
@Override
public Tokenizer create(Reader input) {
- return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input);
+ return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input);
}
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml Wed Feb 6 00:58:30 2013
@@ -20,7 +20,7 @@
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
- <name>DummyPoSTagger</name>
+ <name>EntityAnnotator</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml Wed Feb 6 00:58:30 2013
@@ -20,9 +20,28 @@
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
- <name>DummyPoSTagger</name>
+ <name>WSTokenizer</name>
<version>1.0</version>
<vendor>ASF</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>line-end</name>
+ <description>
+ the string used as line end
+ </description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>line-end</name>
+ <value>
+ <string>\n</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Wed Feb 6 00:58:30 2013
@@ -36,6 +36,8 @@ import org.junit.Before;
import org.junit.Test;
import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Testcase for {@link UIMABaseAnalyzer}
@@ -48,7 +50,7 @@ public class UIMABaseAnalyzerTest extend
@Before
public void setUp() throws Exception {
super.setUp();
- analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
+ analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null);
}
@Override
@@ -120,7 +122,15 @@ public class UIMABaseAnalyzerTest extend
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
+ checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null),
+ 100 * RANDOM_MULTIPLIER);
+ }
+
+ @Test
+ public void testRandomStringsWithConfigurationParameters() throws Exception {
+ Map<String, Object> cp = new HashMap<String, Object>();
+ cp.put("line-end", "\r");
+ checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp),
100 * RANDOM_MULTIPLIER);
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java Wed Feb 6 00:58:30 2013
@@ -37,7 +37,7 @@ public class UIMATypeAwareAnalyzerTest e
public void setUp() throws Exception {
super.setUp();
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
- "org.apache.uima.TokenAnnotation", "posTag");
+ "org.apache.uima.TokenAnnotation", "posTag", null);
}
@Override
@@ -63,7 +63,7 @@ public class UIMATypeAwareAnalyzerTest e
@Test
public void testRandomStrings() throws Exception {
checkRandomData(random(), new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
- "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 100 * RANDOM_MULTIPLIER);
+ "org.apache.lucene.uima.ts.TokenAnnotation", "pos", null), 100 * RANDOM_MULTIPLIER);
}
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java Wed Feb 6 00:58:30 2013
@@ -17,11 +17,13 @@ package org.apache.lucene.analysis.uima.
* limitations under the License.
*/
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
/**
* Dummy implementation of a UIMA based whitespace tokenizer
@@ -30,15 +32,21 @@ public class SampleWSTokenizerAnnotator
private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
- private static final String CR = "\n";
+ private String lineEnd;
private static final String WHITESPACE = " ";
@Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
+ }
+
+ @Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
int i = 0;
- for (String sentenceString : jCas.getDocumentText().split(CR)) {
+ for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
// add the sentence
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
jCas.addFsToIndexes(sentenceAnnotation);
Modified: lucene/dev/branches/lucene4547/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/benchmark/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/benchmark/build.xml Wed Feb 6 00:58:30 2013
@@ -147,6 +147,7 @@
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="${queryparser.jar}"/>
<pathelement path="${facet.jar}"/>
+ <pathelement path="${queries.jar}"/>
<fileset dir="${common.dir}/analysis/icu/lib"/>
<path refid="base.classpath"/>
<fileset dir="lib"/>
Modified: lucene/dev/branches/lucene4547/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/build.xml Wed Feb 6 00:58:30 2013
@@ -157,28 +157,34 @@
<license-check-macro dir="${basedir}" licensedir="${common.dir}/licenses" />
</target>
- <target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
+ <target name="check-forbidden-apis" depends="compile-tools,compile-test,install-forbidden-apis,-forbidden-apis-classpath,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
+
+ <!-- TODO: Make the forbidden API checks per module! -->
+ <target name="-forbidden-apis-classpath">
+ <path id="forbidden-apis.classpath">
+ <fileset dir="${basedir}" includes="**/lib/*.jar"/>
+ <dirset dir="${basedir}/build" includes="**/classes/*"/>
+ </path>
+ </target>
<target name="-check-forbidden-jdk-apis">
- <forbidden-apis>
- <apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
- <include name="jdk.txt" />
- <include name="jdk-deprecated.txt" />
- <include name="executors.txt" />
- </apiFileSet>
+ <forbidden-apis internalRuntimeForbidden="true" classpathref="forbidden-apis.classpath">
+ <bundledSignatures name="jdk-unsafe-${javac.target}"/>
+ <bundledSignatures name="jdk-deprecated-${javac.target}"/>
+ <signaturesFileSet file="${common.dir}/tools/forbiddenApis/executors.txt"/>
<fileset dir="${basedir}/build" includes="**/*.class" />
</forbidden-apis>
</target>
<target name="-check-forbidden-test-apis">
- <forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/tests.txt">
+ <forbidden-apis signaturesFile="${common.dir}/tools/forbiddenApis/tests.txt" classpathref="forbidden-apis.classpath">
<classpath refid="junit-path"/>
<fileset dir="${basedir}/build" includes="**/classes/test/**/*.class,test-framework/**/*.class" />
</forbidden-apis>
</target>
<target name="-check-system-out">
- <forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/system-out.txt">
+ <forbidden-apis bundledSignatures="jdk-system-out" classpathref="forbidden-apis.classpath">
<fileset dir="${basedir}/build">
<include name="**/classes/java/**/*.class"/>
<!-- this is basically tests -->
Modified: lucene/dev/branches/lucene4547/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/common-build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/common-build.xml Wed Feb 6 00:58:30 2013
@@ -1905,6 +1905,14 @@ ${tests-output}/junit4-*.suites - pe
<property name="groovy.loaded" value="true"/>
</target>
+ <!-- Forbidden API Task -->
+ <target name="install-forbidden-apis" unless="forbidden-apis.loaded" depends="ivy-availability-check,ivy-configure">
+ <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="1.0"
+ inline="true" conf="default" transitive="true" pathid="forbidden-apis.classpath"/>
+ <taskdef name="forbidden-apis" classname="de.thetaphi.forbiddenapis.AntTask" classpathref="forbidden-apis.classpath"/>
+ <property name="forbidden-apis.loaded" value="true"/>
+ </target>
+
<!-- PEGDOWN macro: Before using depend on the target "resolve-pegdown" -->
<target name="resolve-pegdown" unless="pegdown.loaded" depends="ivy-availability-check,ivy-configure">
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Wed Feb 6 00:58:30 2013
@@ -29,6 +29,8 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
@@ -90,20 +92,21 @@ public interface FieldCache {
}
/**
- * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
- * processing terms and returns the current FieldCache
- * array.
- * @lucene.internal
- */
- public static final class StopFillCacheException extends RuntimeException {
- }
-
- /**
* Marker interface as super-interface to all parsers. It
* is used to specify a custom parser to {@link
* SortField#SortField(String, FieldCache.Parser)}.
*/
public interface Parser {
+
+ /**
+ * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
+ * to filter the actual TermsEnum before the field cache is filled.
+ *
+ * @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
+ * @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
+ * @throws IOException if an {@link IOException} occurs
+ */
+ public TermsEnum termsEnum(Terms terms) throws IOException;
}
/** Interface to parse bytes from document fields.
@@ -171,6 +174,10 @@ public interface FieldCache {
public String toString() {
return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER";
}
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
};
/** The default parser for short values, which are encoded by {@link Short#toString(short)} */
@@ -187,6 +194,11 @@ public interface FieldCache {
public String toString() {
return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER";
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
};
/** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
@@ -199,6 +211,12 @@ public interface FieldCache {
// directly from byte[]
return Integer.parseInt(term.utf8ToString());
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+
@Override
public String toString() {
return FieldCache.class.getName()+".DEFAULT_INT_PARSER";
@@ -215,6 +233,12 @@ public interface FieldCache {
// directly from byte[]
return Float.parseFloat(term.utf8ToString());
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+
@Override
public String toString() {
return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER";
@@ -231,6 +255,12 @@ public interface FieldCache {
// directly from byte[]
return Long.parseLong(term.utf8ToString());
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+
@Override
public String toString() {
return FieldCache.class.getName()+".DEFAULT_LONG_PARSER";
@@ -247,6 +277,12 @@ public interface FieldCache {
// directly from byte[]
return Double.parseDouble(term.utf8ToString());
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+
@Override
public String toString() {
return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER";
@@ -260,10 +296,14 @@ public interface FieldCache {
public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
@Override
public int parseInt(BytesRef term) {
- if (NumericUtils.getPrefixCodedIntShift(term) > 0)
- throw new StopFillCacheException();
return NumericUtils.prefixCodedToInt(term);
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return NumericUtils.filterPrefixCodedInts(terms.iterator(null));
+ }
+
@Override
public String toString() {
return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER";
@@ -277,14 +317,17 @@ public interface FieldCache {
public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
@Override
public float parseFloat(BytesRef term) {
- if (NumericUtils.getPrefixCodedIntShift(term) > 0)
- throw new StopFillCacheException();
return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term));
}
@Override
public String toString() {
return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER";
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return NumericUtils.filterPrefixCodedInts(terms.iterator(null));
+ }
};
/**
@@ -294,14 +337,17 @@ public interface FieldCache {
public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
@Override
public long parseLong(BytesRef term) {
- if (NumericUtils.getPrefixCodedLongShift(term) > 0)
- throw new StopFillCacheException();
return NumericUtils.prefixCodedToLong(term);
}
@Override
public String toString() {
return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER";
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
+ }
};
/**
@@ -311,14 +357,17 @@ public interface FieldCache {
public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
@Override
public double parseDouble(BytesRef term) {
- if (NumericUtils.getPrefixCodedLongShift(term) > 0)
- throw new StopFillCacheException();
return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term));
}
@Override
public String toString() {
return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER";
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
+ }
};
@@ -634,7 +683,7 @@ public interface FieldCache {
return b.toString();
}
}
-
+
/**
* EXPERT: Generates an array of CacheEntry objects representing all items
* currently in the FieldCache.
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Feb 6 00:58:30 2013
@@ -298,7 +298,7 @@ class FieldCacheImpl implements FieldCac
}
}
- final TermsEnum termsEnum = terms.iterator(null);
+ final TermsEnum termsEnum = termsEnum(terms);
DocsEnum docs = null;
FixedBitSet docsWithField = null;
@@ -307,11 +307,7 @@ class FieldCacheImpl implements FieldCac
if (term == null) {
break;
}
- try {
- visitTerm(term);
- } catch (StopFillCacheException stop) {
- break;
- }
+ visitTerm(term);
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
while (true) {
final int docID = docs.nextDoc();
@@ -331,6 +327,7 @@ class FieldCacheImpl implements FieldCac
}
}
+ protected abstract TermsEnum termsEnum(Terms terms) throws IOException;
protected abstract void visitTerm(BytesRef term);
protected abstract void visitDoc(int docID);
}
@@ -425,6 +422,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
@@ -505,6 +507,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
@@ -610,6 +617,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
@@ -779,6 +791,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
@@ -877,6 +894,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
@@ -975,6 +997,11 @@ class FieldCacheImpl implements FieldCac
public void visitDoc(int docID) {
values[docID] = currentValue;
}
+
+ @Override
+ protected TermsEnum termsEnum(Terms terms) throws IOException {
+ return parser.termsEnum(terms);
+ }
};
u.uninvert(reader, key.field, setDocsWithField);
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java Wed Feb 6 00:58:30 2013
@@ -22,6 +22,8 @@ import org.apache.lucene.document.Double
import org.apache.lucene.document.FloatField; // javadocs
import org.apache.lucene.document.IntField; // javadocs
import org.apache.lucene.document.LongField; // javadocs
+import org.apache.lucene.index.FilteredTermsEnum;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
@@ -456,4 +458,41 @@ public final class NumericUtils {
}
+ /**
+ * Filters the given {@link TermsEnum} by accepting only prefix coded 64 bit
+ * terms with a shift value of <tt>0</tt>.
+ *
+ * @param termsEnum
+ * the terms enum to filter
+ * @return a filtered {@link TermsEnum} that only returns prefix coded 64 bit
+ * terms with a shift value of <tt>0</tt>.
+ */
+ public static TermsEnum filterPrefixCodedLongs(TermsEnum termsEnum) {
+ return new FilteredTermsEnum(termsEnum, false) {
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
+ }
+ };
+ }
+
+ /**
+ * Filters the given {@link TermsEnum} by accepting only prefix coded 32 bit
+ * terms with a shift value of <tt>0</tt>.
+ *
+ * @param termsEnum
+ * the terms enum to filter
+ * @return a filtered {@link TermsEnum} that only returns prefix coded 32 bit
+ * terms with a shift value of <tt>0</tt>.
+ */
+ public static TermsEnum filterPrefixCodedInts(TermsEnum termsEnum) {
+ return new FilteredTermsEnum(termsEnum, false) {
+
+ @Override
+ protected AcceptStatus accept(BytesRef term) {
+ return NumericUtils.getPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
+ }
+ };
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java Wed Feb 6 00:58:30 2013
@@ -20,6 +20,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -94,6 +96,11 @@ final class JustCompileSearch {
public long parseLong(BytesRef string) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
}
@@ -103,6 +110,11 @@ final class JustCompileSearch {
public double parseDouble(BytesRef term) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java Wed Feb 6 00:58:30 2013
@@ -51,6 +51,8 @@ import org.apache.lucene.index.RandomInd
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory;
@@ -625,8 +627,13 @@ public class TestSort extends LuceneTest
public final int parseInt(final BytesRef term) {
return (term.bytes[term.offset]-'A') * 123456;
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " IntParser");
fc.purgeAllCaches();
@@ -635,8 +642,12 @@ public class TestSort extends LuceneTest
public final float parseFloat(final BytesRef term) {
return (float) Math.sqrt( term.bytes[term.offset]);
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " FloatParser");
fc.purgeAllCaches();
@@ -645,8 +656,13 @@ public class TestSort extends LuceneTest
public final long parseLong(final BytesRef term) {
return (term.bytes[term.offset]-'A') * 1234567890L;
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " LongParser");
fc.purgeAllCaches();
@@ -655,8 +671,12 @@ public class TestSort extends LuceneTest
public final double parseDouble(final BytesRef term) {
return Math.pow( term.bytes[term.offset], (term.bytes[term.offset]-'A'));
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " DoubleParser");
fc.purgeAllCaches();
@@ -665,8 +685,13 @@ public class TestSort extends LuceneTest
public final byte parseByte(final BytesRef term) {
return (byte) (term.bytes[term.offset]-'A');
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " ByteParser");
fc.purgeAllCaches();
@@ -675,8 +700,12 @@ public class TestSort extends LuceneTest
public final short parseShort(final BytesRef term) {
return (short) (term.bytes[term.offset]-'A');
}
- }), SortField.FIELD_DOC);
- assertMatches(full, queryA, sort, "JIHGFEDCBA");
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
+ }), SortField.FIELD_DOC );
+ assertMatches (full, queryA, sort, "JIHGFEDCBA");
assertSaneFieldCaches(getTestName() + " ShortParser");
fc.purgeAllCaches();
}
@@ -752,6 +781,11 @@ public class TestSort extends LuceneTest
public final int parseInt(final BytesRef term) {
return (term.bytes[term.offset]-'A') * 123456;
}
+
+ @Override
+ public TermsEnum termsEnum(Terms terms) throws IOException {
+ return terms.iterator(null);
+ }
};
@Override
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/build.xml Wed Feb 6 00:58:30 2013
@@ -27,6 +27,7 @@
<path id="classpath">
<pathelement path="${memory.jar}"/>
+ <pathelement path="${queries.jar}"/>
<path refid="base.classpath"/>
</path>
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Wed Feb 6 00:58:30 2013
@@ -187,11 +187,9 @@ public class Highlighter
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
StringBuilder newText=new StringBuilder();
- CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- tokenStream.addAttribute(PositionIncrementAttribute.class);
- tokenStream.reset();
-
+ CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+ tokenStream.reset();
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
if (fragmentScorer instanceof QueryScorer) {
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Wed Feb 6 00:58:30 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.search.highlig
*/
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -29,11 +29,20 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
@@ -44,6 +53,8 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@@ -53,12 +64,13 @@ public class WeightedSpanTermExtractor {
private String fieldName;
private TokenStream tokenStream;
- private Map<String,AtomicReaderContext> readers = new HashMap<String,AtomicReaderContext>(10);
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
private boolean wrapToCaching = true;
private int maxDocCharsToAnalyze;
+ private AtomicReader reader = null;
+
public WeightedSpanTermExtractor() {
}
@@ -69,18 +81,6 @@ public class WeightedSpanTermExtractor {
}
}
- private void closeReaders() {
- Collection<AtomicReaderContext> ctxSet = readers.values();
-
- for (final AtomicReaderContext ctx : ctxSet) {
- try {
- ctx.reader().close();
- } catch (IOException e) {
- // alert?
- }
- }
- }
-
/**
* Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
*
@@ -146,21 +146,14 @@ public class WeightedSpanTermExtractor {
if (q != null) {
extract(q, terms);
}
+ } else if (query instanceof CommonTermsQuery) {
+ // specialized since rewriting would change the result query
+ // this query is TermContext sensitive.
+ extractWeightedTerms(terms, query);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
}
- } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
- MultiTermQuery mtq = ((MultiTermQuery)query);
- if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
- mtq = (MultiTermQuery) mtq.clone();
- mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
- query = mtq;
- }
- if (mtq.getField() != null) {
- IndexReader ir = getLeafContextForField(mtq.getField()).reader();
- extract(query.rewrite(ir), terms);
- }
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List<Term[]> termArrays = mpq.getTermArrays();
@@ -210,12 +203,30 @@ public class WeightedSpanTermExtractor {
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
+ } else {
+ Query origQuery = query;
+ if (query instanceof MultiTermQuery) {
+ if (!expandMultiTermQuery) {
+ return;
+ }
+ MultiTermQuery copy = (MultiTermQuery) query.clone();
+ copy.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+ origQuery = copy;
+ }
+ final IndexReader reader = getLeafContext().reader();
+ Query rewritten = origQuery.rewrite(reader);
+ if (rewritten != origQuery) {
+ // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+ // if this method is overwritten in a subclass or above in the next recursion
+ extract(rewritten, terms);
+ }
}
extractUnknownQuery(query, terms);
}
protected void extractUnknownQuery(Query query,
Map<String, WeightedSpanTerm> terms) throws IOException {
+
// for sub-classing to extract custom queries
}
@@ -249,7 +260,7 @@ public class WeightedSpanTermExtractor {
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
- final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader());
+ final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.extractTerms(nonWeightedTerms);
}
@@ -266,7 +277,7 @@ public class WeightedSpanTermExtractor {
} else {
q = spanQuery;
}
- AtomicReaderContext context = getLeafContextForField(field);
+ AtomicReaderContext context = getLeafContext();
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> extractedTerms = new TreeSet<Term>();
q.extractTerms(extractedTerms);
@@ -338,23 +349,79 @@ public class WeightedSpanTermExtractor {
return rv;
}
- protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
- if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
- tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
- cachedTokenStream = true;
- }
- AtomicReaderContext context = readers.get(field);
- if (context == null) {
- MemoryIndex indexer = new MemoryIndex();
- indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ protected AtomicReaderContext getLeafContext() throws IOException {
+ if (reader == null) {
+ if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
+ assert !cachedTokenStream;
+ tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ cachedTokenStream = true;
+ }
+ final MemoryIndex indexer = new MemoryIndex(true);
+ indexer.addField(DelegatingAtomicReader.FIELD_NAME, tokenStream);
tokenStream.reset();
- IndexSearcher searcher = indexer.createSearcher();
+ final IndexSearcher searcher = indexer.createSearcher();
// MEM index has only atomic ctx
- context = (AtomicReaderContext) searcher.getTopReaderContext();
- readers.put(field, context);
+ reader = new DelegatingAtomicReader(((AtomicReaderContext)searcher.getTopReaderContext()).reader());
+ }
+ return reader.getContext();
+ }
+
+ /*
+ * This reader will just delegate every call to a single field in the wrapped
+ * AtomicReader. This way we only need to build this field once rather than
+ * N-Times
+ */
+ static final class DelegatingAtomicReader extends FilterAtomicReader {
+ private static final String FIELD_NAME = "shadowed_field";
+
+ DelegatingAtomicReader(AtomicReader in) {
+ super(in);
+ }
+
+ @Override
+ public FieldInfos getFieldInfos() {
+ throw new UnsupportedOperationException();
}
- return context;
+ @Override
+ public Fields fields() throws IOException {
+ return new FilterFields(super.fields()) {
+ @Override
+ public Terms terms(String field) throws IOException {
+ return super.terms(DelegatingAtomicReader.FIELD_NAME);
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
+ }
+
+ @Override
+ public int size() {
+ return 1;
+ }
+ };
+ }
+
+ @Override
+ public NumericDocValues getNumericDocValues(String field) throws IOException {
+ return super.getNumericDocValues(FIELD_NAME);
+ }
+
+ @Override
+ public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+ return super.getBinaryDocValues(FIELD_NAME);
+ }
+
+ @Override
+ public SortedDocValues getSortedDocValues(String field) throws IOException {
+ return super.getSortedDocValues(FIELD_NAME);
+ }
+
+ @Override
+ public NumericDocValues getNormValues(String field) throws IOException {
+ return super.getNormValues(FIELD_NAME);
+ }
}
/**
@@ -401,7 +468,7 @@ public class WeightedSpanTermExtractor {
try {
extract(query, terms);
} finally {
- closeReaders();
+ IOUtils.close(reader);
}
return terms;
@@ -449,8 +516,7 @@ public class WeightedSpanTermExtractor {
weightedSpanTerm.weight *= idf;
}
} finally {
-
- closeReaders();
+ IOUtils.close(reader);
}
return terms;
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Wed Feb 6 00:58:30 2013
@@ -28,9 +28,12 @@ import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
@@ -92,8 +95,7 @@ public class FieldQuery {
if( !clause.isProhibited() )
flatten( clause.getQuery(), reader, flatQueries );
}
- }
- else if( sourceQuery instanceof DisjunctionMaxQuery ){
+ } else if( sourceQuery instanceof DisjunctionMaxQuery ){
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
for( Query query : dmq ){
flatten( query, reader, flatQueries );
@@ -103,12 +105,6 @@ public class FieldQuery {
if( !flatQueries.contains( sourceQuery ) )
flatQueries.add( sourceQuery );
}
- else if (sourceQuery instanceof MultiTermQuery && reader != null) {
- MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
- copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
- BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
- flatten(mtqTerms, reader, flatQueries);
- }
else if( sourceQuery instanceof PhraseQuery ){
if( !flatQueries.contains( sourceQuery ) ){
PhraseQuery pq = (PhraseQuery)sourceQuery;
@@ -118,6 +114,31 @@ public class FieldQuery {
flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
}
}
+ } else if (sourceQuery instanceof ConstantScoreQuery) {
+ final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
+ if (q != null) {
+ flatten(q, reader, flatQueries);
+ }
+ } else if (sourceQuery instanceof FilteredQuery) {
+ final Query q = ((FilteredQuery) sourceQuery).getQuery();
+ if (q != null) {
+ flatten(q, reader, flatQueries);
+ }
+ } else if (reader != null){
+ Query query = sourceQuery;
+ if (sourceQuery instanceof MultiTermQuery) {
+ MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
+ copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
+ query = copy;
+ }
+ Query rewritten = query.rewrite(reader);
+ if (rewritten != query) {
+ // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+ // if this method is overwritten in a subclass.
+ flatten(rewritten, reader, flatQueries);
+
+ }
+ // if the query is already rewritten we discard it
}
// else discard queries
}
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Feb 6 00:58:30 2013
@@ -46,6 +46,7 @@ import org.apache.lucene.index.StoredDoc
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
@@ -114,6 +115,87 @@ public class HighlighterTest extends Bas
}
}
+ public void testHighlightingCommonTermsQuery() throws Exception {
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+ query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "long"));
+ query.add(new Term(FIELD_NAME, "very"));
+
+ searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(scorer);
+
+ StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+ String storedField = doc.get(FIELD_NAME);
+
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+ highlighter.setTextFragmenter(fragmenter);
+ String fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+
+ doc = searcher.doc(hits.scoreDocs[1].doc);
+ storedField = doc.get(FIELD_NAME);
+
+ stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+ fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+ }
+
+ public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException {
+ Query query = new Query() {
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+ query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "long"));
+ query.add(new Term(FIELD_NAME, "very"));
+ return query;
+ }
+
+ @Override
+ public String toString(String field) {
+ return null;
+ }
+
+ };
+
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+
+ searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(scorer);
+
+ StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+ String storedField = doc.get(FIELD_NAME);
+
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+ highlighter.setTextFragmenter(fragmenter);
+ String fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+
+ doc = searcher.doc(hits.scoreDocs[1].doc);
+ storedField = doc.get(FIELD_NAME);
+
+ stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+ fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+
+ }
+
public void testHighlightingWithDefaultField() throws Exception {
String s1 = "I call our world Flatland, not because we call it so,";
@@ -150,7 +232,7 @@ public class HighlighterTest extends Bas
"Query in a named field does not result in highlighting when that field isn't in the query",
s1, highlightField(q, FIELD_NAME, s1));
}
-
+
/**
* This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
*/
@@ -603,7 +685,7 @@ public class HighlighterTest extends Bas
// Not sure we can assert anything here - just running to check we dont
// throw any exceptions
}
-
+
public void testSpanHighlighting() throws Exception {
Query query1 = new SpanNearQuery(new SpanQuery[] {
new SpanTermQuery(new Term(FIELD_NAME, "wordx")),
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Wed Feb 6 00:58:30 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorh
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -26,7 +28,13 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -62,4 +70,47 @@ public class FastVectorHighlighterTest e
writer.close();
dir.close();
}
+
+ public void testCommonTermsQueryHighlightTest() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+ FieldType type = new FieldType(TextField.TYPE_STORED);
+ type.setStoreTermVectorOffsets(true);
+ type.setStoreTermVectorPositions(true);
+ type.setStoreTermVectors(true);
+ type.freeze();
+ String[] texts = {
+ "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
+ "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
+ "JFK has been shot", "John Kennedy has been shot",
+ "This text has a typo in referring to Keneddy",
+ "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
+ for (int i = 0; i < texts.length; i++) {
+ Document doc = new Document();
+ Field field = new Field("field", texts[i], type);
+ doc.add(field);
+ writer.addDocument(doc);
+ }
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
+ query.add(new Term("field", "text"));
+ query.add(new Term("field", "long"));
+ query.add(new Term("field", "very"));
+
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
+ assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
+
+ fieldQuery = highlighter.getFieldQuery(query, reader);
+ bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1);
+ assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Wed Feb 6 00:58:30 2013
@@ -23,8 +23,13 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
@@ -35,6 +40,7 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
public class FieldQueryTest extends AbstractTestCase {
@@ -905,4 +911,40 @@ public class FieldQueryTest extends Abst
assertNotNull (fq.searchPhrase(F, phraseCandidate));
}
+ public void testStopRewrite() throws Exception {
+ Query q = new Query() {
+
+ @Override
+ public String toString(String field) {
+ return "DummyQuery";
+ }
+
+ };
+ make1d1fIndex( "a" );
+ assertNotNull(reader);
+ new FieldQuery(q, reader, true, true );
+ }
+
+ public void testFlattenFilteredQuery() throws Exception {
+ Query query = new FilteredQuery(pqF( "A" ), new Filter() {
+ @Override
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+ throws IOException {
+ return null;
+ }
+ });
+ FieldQuery fq = new FieldQuery( query, true, true );
+ Set<Query> flatQueries = new HashSet<Query>();
+ fq.flatten( query, reader, flatQueries );
+ assertCollectionQueries( flatQueries, tq( "A" ) );
+ }
+
+ public void testFlattenConstantScoreQuery() throws Exception {
+ Query query = new ConstantScoreQuery(pqF( "A" ));
+ FieldQuery fq = new FieldQuery( query, true, true );
+ Set<Query> flatQueries = new HashSet<Query>();
+ fq.flatten( query, reader, flatQueries );
+ assertCollectionQueries( flatQueries, tq( "A" ) );
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Feb 6 00:58:30 2013
@@ -465,7 +465,9 @@ public class MemoryIndex {
throw new RuntimeException(e);
} finally {
try {
- if (stream != null) stream.close();
+ if (stream != null) {
+ stream.close();
+ }
} catch (IOException e2) {
throw new RuntimeException(e2);
}
Modified: lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj (original)
+++ lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj Wed Feb 6 00:58:30 2013
@@ -162,7 +162,7 @@ PARSER_END(QueryParser)
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
-| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
+| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >