You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/17 09:47:36 UTC
svn commit: r1640099 [1/4] - in /lucene/dev/branches/lucene6005: ./
dev-tools/ dev-tools/scripts/ lucene/ lucene/analysis/
lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/
lucene/analysis/common/src/resources/META-...
Author: mikemccand
Date: Mon Nov 17 08:47:34 2014
New Revision: 1640099
URL: http://svn.apache.org/r1640099
Log:
LUCENE-6005: merge trunk
Added:
lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/
- copied from r1640053, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/
lucene/dev/branches/lucene6005/lucene/analysis/common/src/test/org/apache/lucene/analysis/sr/
- copied from r1640053, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sr/
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilter.java
- copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilter.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilterFactory.java
- copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilterFactory.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilter.java
- copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilter.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilterFactory.java
- copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilterFactory.java
lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/CachingNaiveBayesClassifierTest.java
- copied unchanged from r1640053, lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/CachingNaiveBayesClassifierTest.java
lucene/dev/branches/lucene6005/lucene/licenses/commons-codec-1.10.jar.sha1
- copied unchanged from r1640053, lucene/dev/trunk/lucene/licenses/commons-codec-1.10.jar.sha1
lucene/dev/branches/lucene6005/solr/licenses/commons-codec-1.10.jar.sha1
- copied unchanged from r1640053, lucene/dev/trunk/solr/licenses/commons-codec-1.10.jar.sha1
Removed:
lucene/dev/branches/lucene6005/lucene/licenses/commons-codec-1.9.jar.sha1
lucene/dev/branches/lucene6005/solr/licenses/commons-codec-1.9.jar.sha1
Modified:
lucene/dev/branches/lucene6005/ (props changed)
lucene/dev/branches/lucene6005/dev-tools/ (props changed)
lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py
lucene/dev/branches/lucene6005/lucene/ (props changed)
lucene/dev/branches/lucene6005/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene6005/lucene/analysis/ (props changed)
lucene/dev/branches/lucene6005/lucene/analysis/common/ (props changed)
lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
lucene/dev/branches/lucene6005/lucene/benchmark/ (props changed)
lucene/dev/branches/lucene6005/lucene/benchmark/build.xml
lucene/dev/branches/lucene6005/lucene/classification/ (props changed)
lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html
lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
lucene/dev/branches/lucene6005/lucene/core/ (props changed)
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java
lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
lucene/dev/branches/lucene6005/lucene/facet/ (props changed)
lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
lucene/dev/branches/lucene6005/lucene/highlighter/ (props changed)
lucene/dev/branches/lucene6005/lucene/highlighter/build.xml
lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
lucene/dev/branches/lucene6005/lucene/ivy-versions.properties (contents, props changed)
lucene/dev/branches/lucene6005/lucene/join/ (props changed)
lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
lucene/dev/branches/lucene6005/lucene/licenses/ (props changed)
lucene/dev/branches/lucene6005/lucene/suggest/ (props changed)
lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
lucene/dev/branches/lucene6005/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
lucene/dev/branches/lucene6005/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
lucene/dev/branches/lucene6005/lucene/test-framework/ (props changed)
lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
lucene/dev/branches/lucene6005/solr/ (props changed)
lucene/dev/branches/lucene6005/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene6005/solr/bin/ (props changed)
lucene/dev/branches/lucene6005/solr/bin/solr
lucene/dev/branches/lucene6005/solr/bin/solr.cmd (contents, props changed)
lucene/dev/branches/lucene6005/solr/bin/solr.in.cmd (contents, props changed)
lucene/dev/branches/lucene6005/solr/bin/solr.in.sh
lucene/dev/branches/lucene6005/solr/build.xml (contents, props changed)
lucene/dev/branches/lucene6005/solr/core/ (props changed)
lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/core/SolrCore.java
lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java
lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
lucene/dev/branches/lucene6005/solr/licenses/ (props changed)
lucene/dev/branches/lucene6005/solr/webapp/ (props changed)
lucene/dev/branches/lucene6005/solr/webapp/web/css/styles/files.css
lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/files.js
lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/index.js
lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/plugins.js
Modified: lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py (original)
+++ lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py Mon Nov 17 08:47:34 2014
@@ -30,10 +30,11 @@ import os
import subprocess
import sys
-def make_filter_func(src_dir):
- if os.path.exists(os.path.join(src_dir, '.git')):
+def make_filter_func(src_root, src_dir):
+ git_root = os.path.join(src_root, '.git')
+ if os.path.exists(git_root):
def git_filter(filename):
- rc = subprocess.call('git --git-dir=%s check-ignore %s' % (src_dir, filename), shell=True)
+ rc = subprocess.call('git --git-dir=%s check-ignore %s' % (git_root, filename), shell=True, stdout=subprocess.DEVNULL)
return rc == 0
return git_filter
@@ -89,7 +90,7 @@ def run_diff(from_dir, to_dir, skip_whit
flags += 'bBw'
args = ['diff', flags]
- for ignore in ('.svn', '.git', 'build', '.caches'):
+ for ignore in ('.svn', '.git', 'build', '.caches', '.idea', 'idea-build'):
args.append('-x')
args.append(ignore)
args.append(from_dir)
@@ -97,6 +98,13 @@ def run_diff(from_dir, to_dir, skip_whit
return subprocess.Popen(args, shell=False, stdout=subprocess.PIPE)
+def find_root(path):
+ relative = []
+ while not os.path.exists(os.path.join(path, 'lucene', 'CHANGES.txt')):
+ path, base = os.path.split(path)
+ relative.insert(0, base)
+ return path, '' if not relative else os.path.normpath(os.path.join(*relative))
+
def parse_config():
parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
parser.add_argument('--skip-whitespace', action='store_true', default=False,
@@ -107,20 +115,24 @@ def parse_config():
if not os.path.isdir(c.from_dir):
parser.error('\'from\' path %s is not a valid directory' % c.from_dir)
- if not os.path.exists(os.path.join(c.from_dir, 'lucene', 'CHANGES.txt')):
- parser.error('\'from\' path %s is not a valid lucene/solr checkout' % c.from_dir)
+ (c.from_root, from_relative) = find_root(c.from_dir)
+ if c.from_root is None:
+ parser.error('\'from\' path %s is not relative to a lucene/solr checkout' % c.from_dir)
if not os.path.isdir(c.to_dir):
parser.error('\'to\' path %s is not a valid directory' % c.to_dir)
- if not os.path.exists(os.path.join(c.to_dir, 'lucene', 'CHANGES.txt')):
- parser.error('\'to\' path %s is not a valid lucene/solr checkout' % c.to_dir)
-
+ (c.to_root, to_relative) = find_root(c.to_dir)
+ if c.to_root is None:
+ parser.error('\'to\' path %s is not relative to a lucene/solr checkout' % c.to_dir)
+ if from_relative != to_relative:
+ parser.error('\'from\' and \'to\' path are not equivalent relative paths within their'
+ ' checkouts: %r != %r' % (from_relative, to_relative))
return c
def main():
c = parse_config()
p = run_diff(c.from_dir, c.to_dir, c.skip_whitespace)
- should_filter = make_filter_func(c.from_dir)
+ should_filter = make_filter_func(c.from_root, c.from_dir)
print_filtered_output(p.stdout, should_filter)
if __name__ == '__main__':
Modified: lucene/dev/branches/lucene6005/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/CHANGES.txt?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene6005/lucene/CHANGES.txt Mon Nov 17 08:47:34 2014
@@ -77,6 +77,27 @@ New Features
improved exception handling, and indirect norms encoding for sparse fields.
(Mike McCandless, Ryan Ernst, Robert Muir)
+* LUCENE-6053: Add Serbian analyzer. (Nikola Smolenski via Robert Muir, Mike McCandless)
+
+* LUCENE-4400: Add support for new NYSIIS Apache commons phonetic
+ codec (Thomas Neidhart via Mike McCandless)
+
+* LUCENE-6059: Add Daitch-Mokotoff Soundex phonetic Apache commons
+ phonetic codec, and upgrade to Apache commons codec 1.10. (Thomas
+ Neidhart via Mike McCandless)
+
+* LUCENE-6058: With the upgrade to Apache commons codec 1.10, the
+ experimental BeiderMorseFilter has changed its behavior, so any
+ index using it will need to be rebuilt. (Thomas
+ Neidhart via Mike McCandless)
+
+* LUCENE-6050: Accept MUST and MUST_NOT (in addition to SHOULD) for
+ each context passed to Analyzing/BlendedInfixSuggester (Arcadius
+ Ahouansou, jane chang via Mike McCandless)
+
+* LUCENE-5929: Also extract terms to highlight from block join
+ queries. (Julie Tibshirani via Mike McCandless)
+
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
@@ -195,6 +216,9 @@ Bug Fixes
not have the regular "spinlock" of DirectoryReader.open. It now implements
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
+* LUCENE-6004: Don't highlight the LookupResult.key returned from
+ AnalyzingInfixSuggester (Christian Reuschling, jane chang via Mike McCandless)
+
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
* LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to
@@ -228,12 +252,17 @@ Bug Fixes
* LUCENE-6055: PayloadAttribute.clone() now does a deep clone of the underlying
bytes. (Shai Erera)
+
+* LUCENE-6060: Remove dangerous IndexWriter.unlock method (Simon
+ Willnauer, Mike McCandless)
Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect
analysis API changes. (Benson Margulies via Robert Muir - pull request #17)
+* LUCENE-6057: Improve Sort(SortField) docs (Martin Braun via Mike McCandless)
+
Tests
* LUCENE-5957: Add option for tests to not randomize codec
@@ -284,6 +313,9 @@ Optimizations
* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
(Paul Elschot)
+* LUCENE-6033: CachingTokenFilter now uses ArrayList not LinkedList, and has new
+ isCached() method. (David Smiley)
+
Build
* LUCENE-5909: Smoke tester now has better command line parsing and
@@ -306,6 +338,13 @@ Other
* LUCENE-5915: Remove Pulsing postings format. (Robert Muir)
+======================= Lucene 4.10.3 ======================
+
+Bug fixes
+
+* LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match.
+ (Ludovic Boutros, Paul Elschot, Greg Dearing, ehatcher)
+
======================= Lucene 4.10.2 ======================
Bug fixes
Modified: lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Mon Nov 17 08:47:34 2014
@@ -91,6 +91,7 @@ org.apache.lucene.analysis.reverse.Rever
org.apache.lucene.analysis.ru.RussianLightStemFilterFactory
org.apache.lucene.analysis.shingle.ShingleFilterFactory
org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory
+org.apache.lucene.analysis.sr.SerbianNormalizationFilterFactory
org.apache.lucene.analysis.standard.ClassicFilterFactory
org.apache.lucene.analysis.standard.StandardFilterFactory
org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory
Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.phone
*/
import java.io.IOException;
-import java.lang.reflect.Method;
import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -29,6 +29,7 @@ import org.apache.commons.codec.language
import org.apache.commons.codec.language.ColognePhonetic;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.TokenStream;
@@ -46,8 +47,8 @@ import org.apache.lucene.analysis.util.T
* This takes one required argument, "encoder", and the rest are optional:
* <dl>
* <dt>encoder</dt><dd> required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
- * or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by
- * itself if it already contains a '.' or otherwise as in the same package as these others.</dd>
+ * "ColognePhonetic" or "Nysiis" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name
+ * either by itself if it already contains a '.' or otherwise as in the same package as these others.</dd>
* <dt>inject</dt><dd> (default=true) add tokens to the stream with the offset=0</dd>
* <dt>maxCodeLength</dt><dd>The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
* support this then specifying this is an error.</dd>
@@ -82,6 +83,7 @@ public class PhoneticFilterFactory exten
registry.put("RefinedSoundex".toUpperCase(Locale.ROOT), RefinedSoundex.class);
registry.put("Caverphone".toUpperCase(Locale.ROOT), Caverphone2.class);
registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
+ registry.put("Nysiis".toUpperCase(Locale.ROOT), Nysiis.class);
}
final boolean inject; //accessed by the test
Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -17,12 +17,10 @@ package org.apache.lucene.analysis.phone
* limitations under the License.
*/
-import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
/** Simple tests for {@link BeiderMorseFilterFactory} */
@@ -31,10 +29,10 @@ public class TestBeiderMorseFilterFactor
BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new HashMap<String,String>());
TokenStream ts = factory.create(whitespaceMockTokenizer("Weinberg"));
assertTokenStreamContents(ts,
- new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
- new int[] { 0, 0, 0, 0, 0, 0 },
- new int[] { 8, 8, 8, 8, 8, 8 },
- new int[] { 1, 0, 0, 0, 0, 0 });
+ new String[] { "vDnbYrk", "vDnbirk", "vanbYrk", "vanbirk", "vinbYrk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
+ new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0},
+ new int[] { 8, 8, 8, 8, 8, 8, 8, 8, 8},
+ new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0});
}
public void testLanguageSet() throws Exception {
Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Mon Nov 17 08:47:34 2014
@@ -21,7 +21,12 @@ import java.io.IOException;
import java.io.StringReader;
import org.apache.commons.codec.Encoder;
-import org.apache.commons.codec.language.*;
+import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -59,6 +64,11 @@ public class TestPhoneticFilter extends
"TTA1111111", "Datha", "KLN1111111", "Carlene" });
assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+
+ assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
+ new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
+ assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg",
+ new String[] { "A", "B", "C", "EASGAS" });
}
Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -18,14 +18,12 @@ package org.apache.lucene.analysis.phone
*/
import java.io.IOException;
-import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
-import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.Metaphone;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
@@ -164,6 +162,12 @@ public class TestPhoneticFilterFactory e
"67", "Meir", "862", "Schmidt" });
assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt",
new String[] { "67", "862", "67", "862" });
+
+ assertAlgorithm("Nysiis", "true", "Macintosh Knuth Bart Hurd",
+ new String[] { "MCANT", "Macintosh", "NAT", "Knuth",
+ "BAD", "Bart", "HAD", "Hurd" });
+ assertAlgorithm("Nysiis", "false", "Macintosh Knuth Bart Hurd",
+ new String[] { "MCANT", "NAT", "BAD", "HAD" });
}
/** Test that bogus arguments result in exception */
Modified: lucene/dev/branches/lucene6005/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/benchmark/build.xml?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene6005/lucene/benchmark/build.xml Mon Nov 17 08:47:34 2014
@@ -174,6 +174,7 @@
<pathelement path="${spatial.jar}"/>
<pathelement path="${queries.jar}"/>
<pathelement path="${codecs.jar}"/>
+ <pathelement path="${join.jar}"/>
<path refid="base.classpath"/>
<fileset dir="lib"/>
</path>
@@ -276,7 +277,7 @@
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
</target>
- <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs"/>
+ <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs,jar-join"/>
<target name="compile-test" depends="copy-alg-files-for-testing,module-build.compile-test"/>
<target name="copy-alg-files-for-testing" description="copy .alg files as resources for testing">
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java Mon Nov 17 08:47:34 2014
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.concurrent.ConcurrentSkipListMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -53,7 +53,7 @@ import org.apache.lucene.util.fst.Util;
* {@link org.apache.lucene.index.TermsEnum#totalTermFreq} both on a per field
* and a per document basis and then a corresponding
* {@link org.apache.lucene.util.fst.FST} is used for class assignment.
- *
+ *
* @lucene.experimental
*/
public class BooleanPerceptronClassifier implements Classifier<Boolean> {
@@ -67,9 +67,8 @@ public class BooleanPerceptronClassifier
/**
* Create a {@link BooleanPerceptronClassifier}
- *
- * @param threshold
- * the binary threshold for perceptron output evaluation
+ *
+ * @param threshold the binary threshold for perceptron output evaluation
*/
public BooleanPerceptronClassifier(Double threshold, Integer batchSize) {
this.threshold = threshold;
@@ -98,7 +97,7 @@ public class BooleanPerceptronClassifier
Long output = 0l;
try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
CharTermAttribute charTermAttribute = tokenStream
- .addAttribute(CharTermAttribute.class);
+ .addAttribute(CharTermAttribute.class);
tokenStream.reset();
while (tokenStream.incrementToken()) {
String s = charTermAttribute.toString();
@@ -110,7 +109,8 @@ public class BooleanPerceptronClassifier
tokenStream.end();
}
- return new ClassificationResult<>(output >= threshold, output.doubleValue());
+ double score = 1 - Math.exp(-1 * Math.abs(threshold - output.doubleValue()) / threshold);
+ return new ClassificationResult<>(output >= threshold, score);
}
/**
@@ -127,7 +127,7 @@ public class BooleanPerceptronClassifier
*/
@Override
public void train(LeafReader leafReader, String textFieldName,
- String classFieldName, Analyzer analyzer, Query query) throws IOException {
+ String classFieldName, Analyzer analyzer, Query query) throws IOException {
this.textTerms = MultiFields.getTerms(leafReader, textFieldName);
if (textTerms == null) {
@@ -150,7 +150,7 @@ public class BooleanPerceptronClassifier
}
// TODO : remove this map as soon as we have a writable FST
- SortedMap<String,Double> weights = new TreeMap<>();
+ SortedMap<String, Double> weights = new ConcurrentSkipListMap<>();
TermsEnum reuse = textTerms.iterator(null);
BytesRef textTerm;
@@ -177,10 +177,10 @@ public class BooleanPerceptronClassifier
ClassificationResult<Boolean> classificationResult = assignClass(doc
.getField(textFieldName).stringValue());
Boolean assignedClass = classificationResult.getAssignedClass();
-
+
// get the expected result
IndexableField field = doc.getField(classFieldName);
-
+
Boolean correctClass = Boolean.valueOf(field.stringValue());
long modifier = correctClass.compareTo(assignedClass);
if (modifier != 0) {
@@ -198,8 +198,8 @@ public class BooleanPerceptronClassifier
}
private TermsEnum updateWeights(LeafReader leafReader, TermsEnum reuse,
- int docId, Boolean assignedClass, SortedMap<String,Double> weights,
- double modifier, boolean updateFST) throws IOException {
+ int docId, Boolean assignedClass, SortedMap<String, Double> weights,
+ double modifier, boolean updateFST) throws IOException {
TermsEnum cte = textTerms.iterator(reuse);
// get the doc term vectors
@@ -231,12 +231,12 @@ public class BooleanPerceptronClassifier
return reuse;
}
- private void updateFST(SortedMap<String,Double> weights) throws IOException {
+ private void updateFST(SortedMap<String, Double> weights) throws IOException {
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
BytesRefBuilder scratchBytes = new BytesRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder();
- for (Map.Entry<String,Double> entry : weights.entrySet()) {
+ for (Map.Entry<String, Double> entry : weights.entrySet()) {
scratchBytes.copyChars(entry.getKey());
fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
.getValue().longValue());
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Mon Nov 17 08:47:34 2014
@@ -16,6 +16,14 @@
*/
package org.apache.lucene.classification;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Term;
@@ -29,14 +37,6 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
/**
* A k-Nearest Neighbor classifier (see <code>http://en.wikipedia.org/wiki/K-nearest_neighbors</code>) based
* on {@link MoreLikeThis}
@@ -82,14 +82,14 @@ public class KNearestNeighborClassifier
*/
@Override
public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
- TopDocs topDocs=knnSearcher(text);
- List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
- ClassificationResult<BytesRef> retval=null;
- double maxscore=-Double.MAX_VALUE;
- for(ClassificationResult<BytesRef> element:doclist){
- if(element.getScore()>maxscore){
- retval=element;
- maxscore=element.getScore();
+ TopDocs topDocs = knnSearch(text);
+ List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
+ ClassificationResult<BytesRef> retval = null;
+ double maxscore = -Double.MAX_VALUE;
+ for (ClassificationResult<BytesRef> element : doclist) {
+ if (element.getScore() > maxscore) {
+ retval = element;
+ maxscore = element.getScore();
}
}
return retval;
@@ -100,24 +100,24 @@ public class KNearestNeighborClassifier
*/
@Override
public List<ClassificationResult<BytesRef>> getClasses(String text) throws IOException {
- TopDocs topDocs=knnSearcher(text);
- List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+ TopDocs topDocs = knnSearch(text);
+ List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
Collections.sort(doclist);
return doclist;
}
-
+
/**
* {@inheritDoc}
*/
@Override
public List<ClassificationResult<BytesRef>> getClasses(String text, int max) throws IOException {
- TopDocs topDocs=knnSearcher(text);
- List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+ TopDocs topDocs = knnSearch(text);
+ List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
Collections.sort(doclist);
return doclist.subList(0, max);
}
- private TopDocs knnSearcher(String text) throws IOException{
+ private TopDocs knnSearch(String text) throws IOException {
if (mlt == null) {
throw new IOException("You must first call Classifier#train");
}
@@ -132,31 +132,30 @@ public class KNearestNeighborClassifier
}
return indexSearcher.search(mltQuery, k);
}
-
+
private List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
Map<BytesRef, Integer> classCounts = new HashMap<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
- BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
- Integer count = classCounts.get(cl);
- if (count != null) {
- classCounts.put(cl, count + 1);
- } else {
- classCounts.put(cl, 1);
- }
+ BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
+ Integer count = classCounts.get(cl);
+ if (count != null) {
+ classCounts.put(cl, count + 1);
+ } else {
+ classCounts.put(cl, 1);
+ }
}
List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
- int sumdoc=0;
+ int sumdoc = 0;
for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
- Integer count = entry.getValue();
- returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
- sumdoc+=count;
-
+ Integer count = entry.getValue();
+ returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
+ sumdoc += count;
}
-
+
//correction
- if(sumdoc<k){
- for(ClassificationResult<BytesRef> cr:returnList){
- cr.setScore(cr.getScore()*(double)k/(double)sumdoc);
+ if (sumdoc < k) {
+ for (ClassificationResult<BytesRef> cr : returnList) {
+ cr.setScore(cr.getScore() * (double) k / (double) sumdoc);
}
}
return returnList;
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html Mon Nov 17 08:47:34 2014
@@ -17,7 +17,6 @@
<html>
<body>
Uses already seen data (the indexed documents) to classify new documents.
-Currently only contains a (simplistic) Lucene based Naive Bayes classifier,
-a k-Nearest Neighbor classifier and a Perceptron based classifier
+Currently contains a (simplistic) Naive Bayes classifier, a k-Nearest Neighbor classifier and a Perceptron based classifier
</body>
</html>
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java Mon Nov 17 08:47:34 2014
@@ -16,12 +16,12 @@
*/
package org.apache.lucene.classification.utils;
+import java.io.IOException;
+
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
-
/**
* utility class for converting Lucene {@link org.apache.lucene.document.Document}s to <code>Double</code> vectors.
*/
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java Mon Nov 17 08:47:34 2014
@@ -91,7 +91,8 @@ public abstract class ClassificationTest
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
assertNotNull(classificationResult.getAssignedClass());
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
- assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
+ double score = classificationResult.getScore();
+ assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
} finally {
if (leafReader != null)
leafReader.close();
@@ -110,11 +111,12 @@ public abstract class ClassificationTest
ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
assertNotNull(classificationResult.getAssignedClass());
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
- assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
+ double score = classificationResult.getScore();
+ assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
updateSampleIndex(analyzer);
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
- assertEquals(Double.valueOf(classificationResult.getScore()), Double.valueOf(secondClassificationResult.getScore()));
+ assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
} finally {
if (leafReader != null)
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Mon Nov 17 08:47:34 2014
@@ -33,7 +33,6 @@ import java.io.Reader;
/**
* Testcase for {@link SimpleNaiveBayesClassifier}
*/
-// TODO : eventually remove this if / when fallback methods exist for all un-supportable codec methods (see LUCENE-4872)
public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase<BytesRef> {
@Test
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java Mon Nov 17 08:47:34 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.util.AttributeSource;
@@ -27,7 +27,8 @@ import org.apache.lucene.util.AttributeS
/**
* This class can be used if the token attributes of a TokenStream
* are intended to be consumed more than once. It caches
- * all token attribute states locally in a List.
+ * all token attribute states locally in a List when the first call to
+ * {@link #incrementToken()} is called.
*
* <P>CachingTokenFilter implements the optional method
* {@link TokenStream#reset()}, which repositions the
@@ -51,7 +52,7 @@ public final class CachingTokenFilter ex
public final boolean incrementToken() throws IOException {
if (cache == null) {
// fill cache lazily
- cache = new LinkedList<>();
+ cache = new ArrayList<>(64);
fillCache();
iterator = cache.iterator();
}
@@ -81,13 +82,13 @@ public final class CachingTokenFilter ex
*/
@Override
public void reset() {
- if(cache != null) {
+ if (cache != null) {
iterator = cache.iterator();
}
}
private void fillCache() throws IOException {
- while(input.incrementToken()) {
+ while (input.incrementToken()) {
cache.add(captureState());
}
// capture final state
@@ -95,4 +96,9 @@ public final class CachingTokenFilter ex
finalState = captureState();
}
+ /** If the underlying token stream was consumed and cached. */
+ public boolean isCached() {
+ return cache != null;
+ }
+
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Mon Nov 17 08:47:34 2014
@@ -4408,17 +4408,6 @@ public class IndexWriter implements Clos
return directory.makeLock(WRITE_LOCK_NAME).isLocked();
}
- /**
- * Forcibly unlocks the index in the named directory.
- * <P>
- * Caution: this should only be used by failure recovery code,
- * when it is known that no other process nor thread is in fact
- * currently accessing this index.
- */
- public static void unlock(Directory directory) throws IOException {
- directory.makeLock(IndexWriter.WRITE_LOCK_NAME).close();
- }
-
/** If {@link DirectoryReader#open(IndexWriter,boolean)} has
* been called (ie, this writer is in near real-time
* mode), then after a merge completes, this class can be
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java Mon Nov 17 08:47:34 2014
@@ -127,7 +127,11 @@ public class Sort {
setSort(field);
}
- /** Sorts in succession by the criteria in each SortField. */
+ /** Sets the sort to the given criteria in succession: the
+ * first SortField is checked first, but if it produces a
+ * tie, then the second SortField is used to break the tie,
+ * etc. Finally, if there is still a tie after all SortFields
+ * are checked, the internal Lucene docid is used to break it. */
public Sort(SortField... fields) {
setSort(fields);
}
@@ -137,7 +141,11 @@ public class Sort {
this.fields = new SortField[] { field };
}
- /** Sets the sort to the given criteria in succession. */
+ /** Sets the sort to the given criteria in succession: the
+ * first SortField is checked first, but if it produces a
+ * tie, then the second SortField is used to break the tie,
+ * etc. Finally, if there is still a tie after all SortFields
+ * are checked, the internal Lucene docid is used to break it. */
public void setSort(SortField... fields) {
this.fields = fields;
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Mon Nov 17 08:47:34 2014
@@ -234,24 +234,23 @@ public class NearSpansOrdered extends Sp
return true;
}
- /** Check whether two Spans in the same document are ordered.
- * @return true iff spans1 starts before spans2
- * or the spans start at the same position,
- * and spans1 ends before spans2.
+ /** Check whether two Spans in the same document are ordered and not overlapping.
+ * @return false iff spans2's start position is smaller than spans1's end position
*/
- static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
+ static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
- int start1 = spans1.start();
- int start2 = spans2.start();
- /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
- return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
+ assert spans1.start() < spans1.end();
+ assert spans2.start() < spans2.end();
+ return spans1.end() <= spans2.start();
}
- /** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
+ /** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
* starts and ends as parameters.
*/
- private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) {
- return (start1 == start2) ? (end1 < end2) : (start1 < start2);
+ private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
+ assert start1 < end1;
+ assert start2 < end2;
+ return end1 <= start2;
}
/** Order the subSpans within the same document by advancing all later spans
@@ -260,7 +259,7 @@ public class NearSpansOrdered extends Sp
private boolean stretchToOrder() throws IOException {
matchDoc = subSpans[0].doc();
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
- while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
+ while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
if (! subSpans[i].next()) {
inSameDoc = false;
more = false;
@@ -312,7 +311,7 @@ public class NearSpansOrdered extends Sp
} else {
int ppStart = prevSpans.start();
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
- if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
+ if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
break; // Check remaining subSpans.
} else { // prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java Mon Nov 17 08:47:34 2014
@@ -63,7 +63,7 @@ public class NearSpansUnordered extends
@Override
protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
if (spans1.doc() == spans2.doc()) {
- return NearSpansOrdered.docSpansOrdered(spans1, spans2);
+ return docSpansOrdered(spans1, spans2);
} else {
return spans1.doc() < spans2.doc();
}
@@ -233,6 +233,18 @@ public class NearSpansUnordered extends
return more && (atMatch() || next());
}
+ /** Check whether two Spans in the same document are ordered with possible overlap.
+ * @return true iff spans1 starts before spans2
+ * or the spans start at the same position,
+ * and spans1 ends before spans2.
+ */
+ static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
+ assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
+ int start1 = spans1.start();
+ int start2 = spans2.start();
+ return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
+ }
+
private SpansCell min() { return queue.top(); }
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java Mon Nov 17 08:47:34 2014
@@ -48,12 +48,15 @@ public class SpanNearQuery extends SpanQ
/** Construct a SpanNearQuery. Matches spans matching a span from each
* clause, with up to <code>slop</code> total unmatched positions between
- * them. * When <code>inOrder</code> is true, the spans from each clause
- * must be * ordered as in <code>clauses</code>.
+ * them.
+ * <br>When <code>inOrder</code> is true, the spans from each clause
+ * must be in the same order as in <code>clauses</code> and must be non-overlapping.
+ * <br>When <code>inOrder</code> is false, the spans from each clause
+ * need not be ordered and may overlap.
* @param clauses the clauses to find near each other
* @param slop The slop value
* @param inOrder true if order is important
- * */
+ */
public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
this(clauses, slop, inOrder, true);
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java Mon Nov 17 08:47:34 2014
@@ -37,12 +37,10 @@ import java.nio.file.Path;
* <p>When this happens, a {@link LockObtainFailedException}
* is hit when trying to create a writer, in which case you
- * need to explicitly clear the lock file first. You can
- * either manually remove the file, or use the {@link
- * org.apache.lucene.index.IndexWriter#unlock(Directory)}
- * API. But, first be certain that no writer is in fact
- * writing to the index otherwise you can easily corrupt
- * your index.</p>
+ * need to explicitly clear the lock file first by
+ * manually removing the file. But, first be certain that
+ * no writer is in fact writing to the index otherwise you
+ * can easily corrupt your index.</p>
*
* <p>Special care needs to be taken if you change the locking
* implementation: First be certain that no writer is in fact
Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java Mon Nov 17 08:47:34 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.index;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -28,7 +29,6 @@ import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -41,12 +41,14 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MockDirectoryWrapper.FakeIOException;
import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.MockDirectoryWrapper.FakeIOException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
+
+
public class TestDirectoryReaderReopen extends LuceneTestCase {
public void testReopen() throws Exception {
@@ -429,7 +431,6 @@ public class TestDirectoryReaderReopen e
}
public static void createIndex(Random random, Directory dir, boolean multiSegment) throws IOException {
- IndexWriter.unlock(dir);
IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, new MockAnalyzer(random))
.setMergePolicy(new LogDocMergePolicy()));
FieldTypes fieldTypes = w.getFieldTypes();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java Mon Nov 17 08:47:34 2014
@@ -81,6 +81,22 @@ public class TestNearSpansOrdered extend
protected SpanNearQuery makeQuery() {
return makeQuery("w1","w2","w3",1,true);
}
+
+ protected SpanNearQuery makeOverlappedQuery(
+ String sqt1, String sqt2, boolean sqOrdered,
+ String t3, boolean ordered) {
+ return new SpanNearQuery(
+ new SpanQuery[] {
+ new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term(FIELD, sqt1)),
+ new SpanTermQuery(new Term(FIELD, sqt2)) },
+ 1,
+ sqOrdered
+ ),
+ new SpanTermQuery(new Term(FIELD, t3)) },
+ 0,
+ ordered);
+ }
public void testSpanNearQuery() throws Exception {
SpanNearQuery q = makeQuery();
@@ -169,6 +185,22 @@ public class TestNearSpansOrdered extend
Scorer s = w.scorer(leave, leave.reader().getLiveDocs());
assertEquals(1, s.advance(1));
}
+
+ public void testOverlappedOrderedSpan() throws Exception {
+ SpanNearQuery q = makeOverlappedQuery("w5", "w3", false, "w4", true);
+ CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {});
+ }
+
+ public void testOverlappedNonOrderedSpan() throws Exception {
+ SpanNearQuery q = makeOverlappedQuery("w3", "w5", true, "w4", false);
+ CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0});
+ }
+
+ public void testNonOverlappedOrderedSpan() throws Exception {
+ SpanNearQuery q = makeOverlappedQuery("w3", "w4", true, "w5", true);
+ CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0});
+ }
+
/**
* not a direct test of NearSpans, but a demonstration of how/when
@@ -181,5 +213,4 @@ public class TestNearSpansOrdered extend
+ e.toString(),
0.0f < e.getValue());
}
-
}
Modified: lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Mon Nov 17 08:47:34 2014
@@ -23,18 +23,18 @@ import org.apache.lucene.facet.FacetsCon
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.ReaderManager;
import org.apache.lucene.index.SegmentInfos;
@@ -44,8 +44,6 @@ import org.apache.lucene.index.TieredMer
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException; // javadocs
-import org.apache.lucene.store.NativeFSLockFactory;
-import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.util.BytesRef;
/*
@@ -136,22 +134,6 @@ public class DirectoryTaxonomyWriter imp
}
/**
- * Forcibly unlocks the taxonomy in the named directory.
- * <P>
- * Caution: this should only be used by failure recovery code, when it is
- * known that no other process nor thread is in fact currently accessing
- * this taxonomy.
- * <P>
- * This method is unnecessary if your {@link Directory} uses a
- * {@link NativeFSLockFactory} instead of the default
- * {@link SimpleFSLockFactory}. When the "native" lock is used, a lock
- * does not stay behind forever when the process using it dies.
- */
- public static void unlock(Directory directory) throws IOException {
- IndexWriter.unlock(directory);
- }
-
- /**
* Construct a Taxonomy writer.
*
* @param directory
@@ -173,10 +155,7 @@ public class DirectoryTaxonomyWriter imp
* @throws CorruptIndexException
* if the taxonomy is corrupted.
* @throws LockObtainFailedException
- * if the taxonomy is locked by another writer. If it is known
- * that no other concurrent writer is active, the lock might
- * have been left around by an old dead process, and should be
- * removed using {@link #unlock(Directory)}.
+ * if the taxonomy is locked by another writer.
* @throws IOException
* if another error occurred.
*/
Modified: lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Mon Nov 17 08:47:34 2014
@@ -12,8 +12,6 @@ import org.apache.lucene.facet.SlowRAMDi
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.Test;
@@ -916,47 +914,6 @@ public class TestTaxonomyCombined extend
}
/**
- * Test what happens if we try to write to a locked taxonomy writer,
- * and see that we can unlock it and continue.
- */
- @Test
- public void testWriterLock() throws Exception {
- // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
- Directory indexDir = new RAMDirectory();
- TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
- tw.addCategory(new FacetLabel("hi", "there"));
- tw.commit();
- // we deliberately not close the write now, and keep it open and
- // locked.
- // Verify that the writer worked:
- TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
- assertEquals(2, tr.getOrdinal(new FacetLabel("hi", "there")));
- // Try to open a second writer, with the first one locking the directory.
- // We expect to get a LockObtainFailedException.
- try {
- assertNull(new DirectoryTaxonomyWriter(indexDir));
- fail("should have failed to write in locked directory");
- } catch (LockObtainFailedException e) {
- // this is what we expect to happen.
- }
- // Remove the lock, and now the open should succeed, and we can
- // write to the new writer.
- DirectoryTaxonomyWriter.unlock(indexDir);
- TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
- tw2.addCategory(new FacetLabel("hey"));
- tw2.close();
- // See that the writer indeed wrote:
- TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
- assertNotNull(newtr);
- tr.close();
- tr = newtr;
- assertEquals(3, tr.getOrdinal(new FacetLabel("hey")));
- tr.close();
- tw.close();
- indexDir.close();
- }
-
- /**
* fillTaxonomyCheckPaths adds the categories in the categories[] array,
* and asserts that the additions return exactly paths specified in
* expectedPaths[]. This is the same add fillTaxonomy() but also checks
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/build.xml?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/build.xml (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/build.xml Mon Nov 17 08:47:34 2014
@@ -31,10 +31,13 @@
<path id="classpath">
<pathelement path="${memory.jar}"/>
<pathelement path="${queries.jar}"/>
+ <pathelement path="${join.jar}"/>
<path refid="base.classpath"/>
</path>
- <target name="compile-core" depends="jar-memory, common.compile-core" />
+ <target name="init" depends="module-build.init,jar-memory,jar-queries,jar-join"/>
+
+ <target name="compile-core" depends="jar-memory, common.compile-core, jar-join" />
<target name="javadocs" depends="javadocs-memory,compile-core,check-javadocs-uptodate"
unless="javadocs-uptodate-${name}">
<invoke-module-javadoc>
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Nov 17 08:47:34 2014
@@ -44,6 +44,8 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -154,6 +156,10 @@ public class WeightedSpanTermExtractor {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
}
+ } else if (query instanceof ToParentBlockJoinQuery) {
+ extract(((ToParentBlockJoinQuery) query).getChildQuery(), terms);
+ } else if (query instanceof ToChildBlockJoinQuery) {
+ extract(((ToChildBlockJoinQuery) query).getParentQuery(), terms);
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List<Term[]> termArrays = mpq.getTermArrays();
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Mon Nov 17 08:47:34 2014
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -49,6 +50,11 @@ import org.apache.lucene.queries.CommonT
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
+import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
+import org.apache.lucene.search.join.BitDocIdSetFilter;
+import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -514,6 +520,62 @@ public class HighlighterTest extends Bas
}
+
+ public void testToParentBlockJoinQuery() throws Exception {
+ BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+ new QueryWrapperFilter(
+ new TermQuery(new Term(FIELD_NAME, "parent"))));
+
+ query = new ToParentBlockJoinQuery(new TermQuery(new Term(FIELD_NAME, "child")),
+ parentFilter, ScoreMode.None);
+ searcher = newSearcher(reader);
+ hits = searcher.search(query, 100);
+ int maxNumFragmentsRequired = 2;
+
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(this, scorer);
+
+ for (int i = 0; i < hits.totalHits; i++) {
+ String text = "child document";
+ TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+
+ highlighter.setTextFragmenter(new SimpleFragmenter(40));
+ highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+ }
+
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+ numHighlights == 1);
+ }
+
+ public void testToChildBlockJoinQuery() throws Exception {
+ BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+ new QueryWrapperFilter(
+ new TermQuery(new Term(FIELD_NAME, "parent"))));
+
+ BooleanQuery booleanQuery = new BooleanQuery();
+ booleanQuery.add(new ToChildBlockJoinQuery(new TermQuery(
+ new Term(FIELD_NAME, "parent")), parentFilter, false), Occur.MUST);
+ booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "child")), Occur.MUST);
+ query = booleanQuery;
+
+ searcher = newSearcher(reader);
+ hits = searcher.search(query, 100);
+ int maxNumFragmentsRequired = 2;
+
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(this, scorer);
+
+ for (int i = 0; i < hits.totalHits; i++) {
+ String text = "parent document";
+ TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+
+ highlighter.setTextFragmenter(new SimpleFragmenter(40));
+ highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+ }
+
+ assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+ numHighlights == 1);
+ }
public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
PhraseQuery phraseQuery = new PhraseQuery();
@@ -1900,6 +1962,10 @@ public class HighlighterTest extends Bas
doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
writer.addDocument(doc, analyzer);
+ Document childDoc = doc(FIELD_NAME, "child document");
+ Document parentDoc = doc(FIELD_NAME, "parent document");
+ writer.addDocuments(Arrays.asList(childDoc, parentDoc));
+
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(ramDir);
Modified: lucene/dev/branches/lucene6005/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/ivy-versions.properties?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/lucene6005/lucene/ivy-versions.properties Mon Nov 17 08:47:34 2014
@@ -55,7 +55,7 @@ com.sun.jersey.version = 1.9
/com.uwyn/jhighlight = 1.0
/commons-beanutils/commons-beanutils = 1.8.3
/commons-cli/commons-cli = 1.2
-/commons-codec/commons-codec = 1.9
+/commons-codec/commons-codec = 1.10
/commons-collections/commons-collections = 3.2.1
/commons-configuration/commons-configuration = 1.6
/commons-digester/commons-digester = 2.1
Modified: lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Mon Nov 17 08:47:34 2014
@@ -90,6 +90,11 @@ public class ToChildBlockJoinQuery exten
return new ToChildBlockJoinWeight(this, parentQuery.createWeight(searcher), parentsFilter, doScores);
}
+ /** Return our parent query. */
+ public Query getParentQuery() {
+ return parentQuery;
+ }
+
private static class ToChildBlockJoinWeight extends Weight {
private final Query joinQuery;
private final Weight parentWeight;
Modified: lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Mon Nov 17 08:47:34 2014
@@ -122,6 +122,11 @@ public class ToParentBlockJoinQuery exte
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
}
+
+ /** Return our child query. */
+ public Query getChildQuery() {
+ return childQuery;
+ }
private static class BlockJoinWeight extends Weight {
private final Query joinQuery;
Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Mon Nov 17 08:47:34 2014
@@ -22,8 +22,10 @@ import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
@@ -70,8 +72,8 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.suggest.InputIterator;
-import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
@@ -390,7 +392,22 @@ public class AnalyzingInfixSuggester ext
/** Lookup, without any context. */
public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
- return lookup(key, null, num, allTermsRequired, doHighlight);
+ return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
+ }
+
+ /** Lookup, with context but without booleans. Context booleans default to SHOULD,
+ * so each suggestion must have at least one of the contexts. */
+ public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+
+ if (contexts == null) {
+ return lookup(key, num, allTermsRequired, doHighlight);
+ }
+
+ Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
+ for (BytesRef context : contexts) {
+ contextInfo.put(context, BooleanClause.Occur.SHOULD);
+ }
+ return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
}
/** This is called if the last token isn't ended
@@ -408,7 +425,7 @@ public class AnalyzingInfixSuggester ext
/** Retrieve suggestions, specifying whether all terms
* must match ({@code allTermsRequired}) and whether the hits
* should be highlighted ({@code doHighlight}). */
- public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+ public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built");
@@ -469,21 +486,35 @@ public class AnalyzingInfixSuggester ext
}
}
- if (contexts != null) {
- BooleanQuery sub = new BooleanQuery();
- query.add(sub, BooleanClause.Occur.MUST);
- for(BytesRef context : contexts) {
- // NOTE: we "should" wrap this in
- // ConstantScoreQuery, or maybe send this as a
- // Filter instead to search, but since all of
- // these are MUST'd, the change to the score won't
- // affect the overall ranking. Since we indexed
- // as DOCS_ONLY, the perf should be the same
- // either way (no freq int[] blocks to decode):
-
- // TODO: if we had a BinaryTermField we could fix
- // this "must be valid ut8f" limitation:
- sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.utf8ToString())), BooleanClause.Occur.SHOULD);
+ if (contextInfo != null) {
+
+ boolean allMustNot = true;
+ for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+ if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
+ allMustNot = false;
+ break;
+ }
+ }
+
+ // do not make a subquery if all context booleans are must not
+ if (allMustNot == true) {
+ for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+ query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
+ }
+
+ } else {
+ BooleanQuery sub = new BooleanQuery();
+ query.add(sub, BooleanClause.Occur.MUST);
+
+ for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+ // NOTE: we "should" wrap this in
+ // ConstantScoreQuery, or maybe send this as a
+ // Filter instead to search.
+
+ // TODO: if we had a BinaryTermField we could fix
+ // this "must be valid ut8f" limitation:
+ sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
+ }
}
}
}
@@ -572,8 +603,7 @@ public class AnalyzingInfixSuggester ext
LookupResult result;
if (doHighlight) {
- Object highlightKey = highlight(text, matchedTokens, prefixToken);
- result = new LookupResult(highlightKey.toString(), highlightKey, score, payload, contexts);
+ result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload, contexts);
} else {
result = new LookupResult(text, score, payload, contexts);
}
@@ -664,12 +694,14 @@ public class AnalyzingInfixSuggester ext
protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
// TODO: apps can try to invert their analysis logic
// here, e.g. downcase the two before checking prefix:
+ if (prefixToken.length() >= surface.length()) {
+ addWholeMatch(sb, surface, analyzed);
+ return;
+ }
sb.append("<b>");
sb.append(surface.substring(0, prefixToken.length()));
sb.append("</b>");
- if (prefixToken.length() < surface.length()) {
- sb.append(surface.substring(prefixToken.length()));
- }
+ sb.append(surface.substring(prefixToken.length()));
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Mon Nov 17 08:47:34 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexOpti
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopFieldDocs;
@@ -147,6 +149,12 @@ public class BlendedInfixSuggester exten
}
@Override
+ public List<Lookup.LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+ // here we multiply the number of searched element by the defined factor
+ return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
+ }
+
+ @Override
protected FieldType getTextFieldType() {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
@@ -199,8 +207,7 @@ public class BlendedInfixSuggester exten
LookupResult result;
if (doHighlight) {
- Object highlightKey = highlight(text, matchedTokens, prefixToken);
- result = new LookupResult(highlightKey.toString(), highlightKey, score, payload);
+ result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload);
} else {
result = new LookupResult(text, score, payload);
}