You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/03/11 20:03:46 UTC
svn commit: r1576473 [1/2] - in /lucene/dev/branches/lucene5487: ./ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/
lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspel...
Author: mikemccand
Date: Tue Mar 11 19:03:45 2014
New Revision: 1576473
URL: http://svn.apache.org/r1576473
Log:
LUCENE-5487: merge trunk
Added:
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff
- copied unchanged from r1576274, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java (with props)
lucene/dev/branches/lucene5487/solr/solrj/src/test-files/solrj/javabin_backcompat.bin
- copied unchanged from r1576274, lucene/dev/trunk/solr/solrj/src/test-files/solrj/javabin_backcompat.bin
Modified:
lucene/dev/branches/lucene5487/ (props changed)
lucene/dev/branches/lucene5487/build.xml
lucene/dev/branches/lucene5487/extra-targets.xml
lucene/dev/branches/lucene5487/lucene/ (props changed)
lucene/dev/branches/lucene5487/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene5487/lucene/analysis/ (props changed)
lucene/dev/branches/lucene5487/lucene/analysis/common/ (props changed)
lucene/dev/branches/lucene5487/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff
lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff
lucene/dev/branches/lucene5487/lucene/core/ (props changed)
lucene/dev/branches/lucene5487/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
lucene/dev/branches/lucene5487/lucene/grouping/ (props changed)
lucene/dev/branches/lucene5487/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java
lucene/dev/branches/lucene5487/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
lucene/dev/branches/lucene5487/lucene/memory/ (props changed)
lucene/dev/branches/lucene5487/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
lucene/dev/branches/lucene5487/lucene/queries/ (props changed)
lucene/dev/branches/lucene5487/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
lucene/dev/branches/lucene5487/lucene/test-framework/ (props changed)
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
lucene/dev/branches/lucene5487/solr/ (props changed)
lucene/dev/branches/lucene5487/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene5487/solr/core/ (props changed)
lucene/dev/branches/lucene5487/solr/core/src/java/org/apache/solr/cloud/Overseer.java
lucene/dev/branches/lucene5487/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
lucene/dev/branches/lucene5487/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java
lucene/dev/branches/lucene5487/solr/solrj/ (props changed)
lucene/dev/branches/lucene5487/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
lucene/dev/branches/lucene5487/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
lucene/dev/branches/lucene5487/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
lucene/dev/branches/lucene5487/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
lucene/dev/branches/lucene5487/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
lucene/dev/branches/lucene5487/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
Modified: lucene/dev/branches/lucene5487/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/build.xml?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/build.xml (original)
+++ lucene/dev/branches/lucene5487/build.xml Tue Mar 11 19:03:45 2014
@@ -18,6 +18,12 @@
-->
<project name="lucene-solr" default="-projecthelp" basedir=".">
+ <!-- Look for property definition in various *build.properties files -->
+ <property file="${user.home}/lucene.build.properties"/>
+ <property file="${user.home}/build.properties"/>
+ <property file="${basedir}/build.properties"/>
+ <property file="lucene/build.properties"/><!-- hack for Lucene users, clones Lucene's common-build.xml -->
+
<target name="-projecthelp">
<java fork="false" classname="org.apache.tools.ant.Main" taskname="-">
<arg value="-projecthelp"/>
@@ -268,10 +274,6 @@
</target>
<target name="idea" depends="resolve" description="Setup IntelliJ IDEA configuration">
- <!-- Look for property definition for ${idea.jdk} in various *build.properties files -->
- <property file="lucene/build.properties"/> <!-- Look in the current project first -->
- <property file="${user.home}/lucene.build.properties"/>
- <property file="${user.home}/build.properties"/>
<condition property="idea.jdk.is.set">
<isset property="idea.jdk"/>
</condition>
Modified: lucene/dev/branches/lucene5487/extra-targets.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/extra-targets.xml?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/extra-targets.xml (original)
+++ lucene/dev/branches/lucene5487/extra-targets.xml Tue Mar 11 19:03:45 2014
@@ -92,7 +92,7 @@
<svn-checker failonmodifications="true"/>
</target>
- <property name="svnkit.version" value="1.7.8"/>
+ <property name="svnkit.version" value="1.8.4"/>
<macrodef xmlns:ivy="antlib:org.apache.ivy.ant" name="svn-checker">
<attribute name="failonmodifications" default="true"/> <!-- false if file modifications are allowed -->
@@ -107,8 +107,6 @@
import org.tmatesoft.svn.core.wc.*;
import org.apache.tools.ant.Project;
- def RECOMMENDED_SVNKIT_18 = '1.8.2';
-
SVNClientManager manager = SVNClientManager.newInstance();
SVNStatusClient statusClient = manager.getStatusClient();
SVNWCClient wcClient = manager.getWCClient();
@@ -124,11 +122,7 @@
def ec = ex.getErrorMessage().getErrorCode();
int code = ec.getCode();
int category = ec.getCategory();
- if (code == SVNErrorCode.WC_UNSUPPORTED_FORMAT.getCode()) {
- task.log('WARNING: Unsupported SVN working copy version! Disabling checks...', Project.MSG_WARN);
- task.log('If your working copy is on version 1.8 already, please pass -Dsvnkit.version=' + RECOMMENDED_SVNKIT_18 + ' to successfully run checks.', Project.MSG_INFO);
- return;
- } else if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
+ if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
task.log('WARNING: Development directory is not an SVN checkout! Disabling checks...', Project.MSG_WARN);
return;
} else if (category == SVNErrorCode.WC_CATEGORY) {
Modified: lucene/dev/branches/lucene5487/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/CHANGES.txt?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene5487/lucene/CHANGES.txt Tue Mar 11 19:03:45 2014
@@ -147,15 +147,33 @@ Bug fixes
recursive affix application are driven correctly by continuation classes in the affix file.
(Robert Muir)
+* LUCENE-5497: HunspellStemFilter properly handles escaped terms and affixes without conditions.
+ (Robert Muir)
+
+* LUCENE-5505: HunspellStemFilter ignores BOM markers in dictionaries and handles varying
+ types of whitespace in SET/FLAG commands. (Robert Muir)
+
+* LUCENE-5507: Fix HunspellStemFilter loading of dictionaries with large amounts of aliases
+ etc before the encoding declaration. (Robert Muir)
+
+* LUCENE-5502: Fixed TermsFilter.equals that could return true for different
+ filters. (Igor Motov via Adrien Grand)
+
Test Framework
* LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.
+* LUCENE-5501: Added random out-of-order collection testing (when the collector
+ supports it) to AssertingIndexSearcher. (Adrien Grand)
+
Build
* LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API.
(Adrien Grand, Robert Muir)
+* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again
+ with any working copy format (thanks to svnkit 1.8.4). (Uwe Schindler)
+
======================= Lucene 4.7.0 =======================
New Features
@@ -188,7 +206,7 @@ New Features
AnalyzingInfixSuggester but boosts suggestions that matched tokens
with lower positions. (Remi Melisson via Mike McCandless)
-* LUCENE-4399: When sorting by String (SortField.STRING), you can now
+* LUCENE-5399: When sorting by String (SortField.STRING), you can now
specify whether missing values should be sorted first (the default),
using SortField.setMissingValue(SortField.STRING_FIRST), or last,
using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java Tue Mar 11 19:03:45 2014
@@ -35,12 +35,16 @@ import org.apache.lucene.util.fst.Output
import org.apache.lucene.util.fst.Util;
import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
+import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
@@ -54,6 +58,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -154,21 +159,41 @@ public class Dictionary {
this.ignoreCase = ignoreCase;
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
- // TODO: we really need to probably buffer this on disk since so many newer dictionaries
- // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
- // their encoding... but for now this large buffer is a workaround
- BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
- buffered.mark(65536);
- String encoding = getDictionaryEncoding(buffered);
- buffered.reset();
- CharsetDecoder decoder = getJavaEncoding(encoding);
- readAffixFile(buffered, decoder);
flagLookup.add(new BytesRef()); // no flags -> ord 0
stripLookup.add(new BytesRef()); // no strip -> ord 0
- IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
- Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
- readDictionaryFiles(dictionaries, decoder, b);
- words = b.finish();
+
+ File aff = File.createTempFile("affix", "aff", tempDir);
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
+ InputStream aff1 = null;
+ InputStream aff2 = null;
+ try {
+ // copy contents of affix stream to temp file
+ final byte [] buffer = new byte [1024 * 8];
+ int len;
+ while ((len = affix.read(buffer)) > 0) {
+ out.write(buffer, 0, len);
+ }
+ out.close();
+
+ // pass 1: get encoding
+ aff1 = new BufferedInputStream(new FileInputStream(aff));
+ String encoding = getDictionaryEncoding(aff1);
+
+ // pass 2: parse affixes
+ CharsetDecoder decoder = getJavaEncoding(encoding);
+ aff2 = new BufferedInputStream(new FileInputStream(aff));
+ readAffixFile(aff2, decoder);
+
+ // read dictionary entries
+ IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
+ Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+ readDictionaryFiles(dictionaries, decoder, b);
+ words = b.finish();
+ aliases = null; // no longer needed
+ } finally {
+ IOUtils.closeWhileHandlingException(out, aff1, aff2);
+ aff.delete();
+ }
}
/**
@@ -251,6 +276,10 @@ public class Dictionary {
LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
String line = null;
while ((line = reader.readLine()) != null) {
+ // ignore any BOM marker on first line
+ if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
+ line = line.substring(1);
+ }
if (line.startsWith(ALIAS_KEY)) {
parseAlias(line);
} else if (line.startsWith(PREFIX_KEY)) {
@@ -348,8 +377,10 @@ public class Dictionary {
String line = reader.readLine();
String ruleArgs[] = line.split("\\s+");
- if (ruleArgs.length < 5) {
- throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
+ // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
+ // condition is optional
+ if (ruleArgs.length < 4) {
+ throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
}
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
@@ -370,7 +401,7 @@ public class Dictionary {
Arrays.sort(appendFlags);
}
- String condition = ruleArgs[4];
+ String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
// at least the gascon affix file has this issue
if (condition.startsWith("[") && !condition.endsWith("]")) {
condition = condition + "]";
@@ -464,6 +495,9 @@ public class Dictionary {
return builder.finish();
}
+
+ /** pattern accepts optional BOM + SET + any whitespace */
+ final static Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
/**
* Parses the encoding specified in the affix file readable through the provided InputStream
@@ -473,7 +507,7 @@ public class Dictionary {
* @throws IOException Can be thrown while reading from the InputStream
* @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>}
*/
- private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
+ static String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
final StringBuilder encoding = new StringBuilder();
for (;;) {
encoding.setLength(0);
@@ -496,9 +530,10 @@ public class Dictionary {
}
continue;
}
- if (encoding.length() > 4 && "SET ".equals(encoding.substring(0, 4))) {
- // cleanup the encoding string, too (whitespace)
- return encoding.substring(4).trim();
+ Matcher matcher = ENCODING_PATTERN.matcher(encoding);
+ if (matcher.find()) {
+ int last = matcher.end();
+ return encoding.substring(last).trim();
}
}
}
@@ -536,8 +571,12 @@ public class Dictionary {
* @param flagLine Line containing the flag information
* @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition
*/
- private FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
- String flagType = flagLine.substring(5);
+ static FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
+ String parts[] = flagLine.split("\\s+");
+ if (parts.length != 2) {
+ throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine);
+ }
+ String flagType = parts[1];
if (NUM_FLAG_TYPE.equals(flagType)) {
return new NumFlagParsingStrategy();
@@ -550,6 +589,24 @@ public class Dictionary {
throw new IllegalArgumentException("Unknown flag type: " + flagType);
}
+ final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
+
+ String unescapeEntry(String entry) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < entry.length(); i++) {
+ char ch = entry.charAt(i);
+ if (ch == '\\' && i+1 < entry.length()) {
+ sb.append(entry.charAt(i+1));
+ i++;
+ } else if (ch == '/') {
+ sb.append(FLAG_SEPARATOR);
+ } else {
+ sb.append(ch);
+ }
+ }
+ return sb.toString();
+ }
+
/**
* Reads the dictionary file through the provided InputStreams, building up the words map
*
@@ -570,8 +627,9 @@ public class Dictionary {
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
while ((line = lines.readLine()) != null) {
+ line = unescapeEntry(line);
if (needsInputCleaning) {
- int flagSep = line.lastIndexOf('/');
+ int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1) {
CharSequence cleansed = cleanInput(line, sb);
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
@@ -604,7 +662,7 @@ public class Dictionary {
scratch1.length = o1.length;
for (int i = scratch1.length - 1; i >= 0; i--) {
- if (scratch1.bytes[scratch1.offset + i] == '/') {
+ if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
scratch1.length = i;
break;
}
@@ -615,7 +673,7 @@ public class Dictionary {
scratch2.length = o2.length;
for (int i = scratch2.length - 1; i >= 0; i--) {
- if (scratch2.bytes[scratch2.offset + i] == '/') {
+ if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
scratch2.length = i;
break;
}
@@ -648,7 +706,7 @@ public class Dictionary {
String entry;
char wordForm[];
- int flagSep = line.lastIndexOf('/');
+ int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1) {
wordForm = NOFLAGS;
entry = line;
@@ -738,7 +796,9 @@ public class Dictionary {
final int count = Integer.parseInt(ruleArgs[1]);
aliases = new String[count];
} else {
- aliases[aliasCount++] = ruleArgs[1];
+ // an alias can map to no flags
+ String aliasValue = ruleArgs.length == 1 ? "" : ruleArgs[1];
+ aliases[aliasCount++] = aliasValue;
}
}
@@ -753,7 +813,7 @@ public class Dictionary {
/**
* Abstraction of the process of parsing flags taken from the affix and dic files
*/
- private static abstract class FlagParsingStrategy {
+ static abstract class FlagParsingStrategy {
/**
* Parses the given String into a single flag
@@ -828,6 +888,9 @@ public class Dictionary {
}
StringBuilder builder = new StringBuilder();
+ if (rawFlags.length() % 2 == 1) {
+ throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
+ }
for (int i = 0; i < rawFlags.length(); i+=2) {
char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
builder.append(cookedFlag);
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java Tue Mar 11 19:03:45 2014
@@ -55,7 +55,7 @@ abstract class StemmerTestBase extends L
}
try {
- Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), true);
+ Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
stemmer = new Stemmer(dictionary);
} finally {
IOUtils.closeWhileHandlingException(affixStream);
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java Tue Mar 11 19:03:45 2014
@@ -47,8 +47,8 @@ public class TestAllDictionaries2 extend
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
-//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
-//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
+ "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
+ "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
@@ -72,13 +72,13 @@ public class TestAllDictionaries2 extend
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
-//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
+ "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
-//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
+ "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
@@ -101,10 +101,10 @@ public class TestAllDictionaries2 extend
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
-//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
-//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
+ "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff",
+//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
-//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
+ "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
@@ -113,8 +113,8 @@ public class TestAllDictionaries2 extend
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
-//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
-//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
+ "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
+ "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
@@ -125,8 +125,8 @@ public class TestAllDictionaries2 extend
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
-//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
-//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
+ "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
+ "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
@@ -146,22 +146,22 @@ public class TestAllDictionaries2 extend
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
-//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
+//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
-//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
+ "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
-//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
+ "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
-//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
+ "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
@@ -196,7 +196,7 @@ public class TestAllDictionaries2 extend
}
public void testOneDictionary() throws Exception {
- String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
+ String toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
for (int i = 0; i < tests.length; i++) {
if (tests[i].equals(toTest)) {
File f = new File(DICTIONARY_HOME, tests[i]);
@@ -210,7 +210,7 @@ public class TestAllDictionaries2 extend
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
- new Dictionary(affix, dictionary);
+ new Dictionary(affix, dictionary);
}
}
}
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java Tue Mar 11 19:03:45 2014
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunsp
* limitations under the License.
*/
+import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -24,6 +25,7 @@ import java.text.ParseException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.fst.Builder;
@@ -77,6 +79,40 @@ public class TestDictionary extends Luce
affixStream.close();
dictStream.close();
}
+
+ public void testCompressedBeforeSetDictionary() throws Exception {
+ InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
+ InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
+
+ Dictionary dictionary = new Dictionary(affixStream, dictStream);
+ assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
+ IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
+ BytesRef ref = new BytesRef();
+ dictionary.flagLookup.get(ordList.ints[0], ref);
+ char flags[] = Dictionary.decodeFlags(ref);
+ assertEquals(1, flags.length);
+
+ affixStream.close();
+ dictStream.close();
+ }
+
+ public void testCompressedEmptyAliasDictionary() throws Exception {
+ InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
+ InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
+
+ Dictionary dictionary = new Dictionary(affixStream, dictStream);
+ assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
+ assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
+ IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
+ BytesRef ref = new BytesRef();
+ dictionary.flagLookup.get(ordList.ints[0], ref);
+ char flags[] = Dictionary.decodeFlags(ref);
+ assertEquals(1, flags.length);
+
+ affixStream.close();
+ dictStream.close();
+ }
// malformed rule causes ParseException
public void testInvalidData() throws Exception {
@@ -87,7 +123,7 @@ public class TestDictionary extends Luce
new Dictionary(affixStream, dictStream);
fail("didn't get expected exception");
} catch (ParseException expected) {
- assertEquals("The affix file contains a rule with less than five elements", expected.getMessage());
+ assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
assertEquals(24, expected.getErrorOffset());
}
@@ -178,4 +214,16 @@ public class TestDictionary extends Luce
Dictionary.applyMappings(fst, sb);
assertEquals("ghghghde", sb.toString());
}
+
+ public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8))));
+ }
+
+ public void testFlagWithCrazyWhitespace() throws Exception {
+ assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
+ assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
+ }
}
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java Tue Mar 11 19:03:45 2014
@@ -31,6 +31,7 @@ import org.apache.lucene.analysis.hunspe
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -39,9 +40,13 @@ public class TestHunspellStemFilter exte
@BeforeClass
public static void beforeClass() throws Exception {
- try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
- InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
+ // no multiple try-with to workaround bogus VerifyError
+ InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
+ InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
+ try {
dictionary = new Dictionary(affixStream, dictStream);
+ } finally {
+ IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
}
@@ -97,9 +102,13 @@ public class TestHunspellStemFilter exte
public void testIgnoreCaseNoSideEffects() throws Exception {
final Dictionary d;
- try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
- InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
+ // no multiple try-with to workaround bogus VerifyError
+ InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
+ InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
+ try {
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
+ } finally {
+ IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
Analyzer a = new Analyzer() {
@Override
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff Tue Mar 11 19:03:45 2014
@@ -19,6 +19,6 @@ SFX E 0 d o
PFX B Y 1
PFX B 0 s o
-#wrong rule (only 4 elements)
+#wrong rule (only 3 elements)
PFX A0 Y 1
-PFX A0 0 a
\ No newline at end of file
+PFX A0 0
\ No newline at end of file
Modified: lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff (original)
+++ lucene/dev/branches/lucene5487/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff Tue Mar 11 19:03:45 2014
@@ -1,8 +1,3 @@
-SET UTF-8
-TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-FLAG long
-
AF 5
AF AA
AF BB
@@ -10,6 +5,11 @@ AF CC
AF DD
AF EE
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
SFX AA Y 3
SFX AA 0 e n
SFX AA 0 e t
Modified: lucene/dev/branches/lucene5487/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java Tue Mar 11 19:03:45 2014
@@ -19,7 +19,7 @@ package org.apache.lucene.search;
import java.util.Collection;
-/** Used by {@link BulkScorers} that need to pass a {@link
+/** Used by {@link BulkScorer}s that need to pass a {@link
* Scorer} to {@link Collector#setScorer}. */
final class FakeScorer extends Scorer {
float score;
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java Tue Mar 11 19:03:45 2014
@@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicRea
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchNoBits;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
@@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends L
public void testReuseDocsEnumDifferentReader() throws IOException {
Directory dir = newDirectory();
Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
+ newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp));
int numdocs = atLeast(20);
createRandomIndex(numdocs, writer, random());
writer.commit();
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Tue Mar 11 19:03:45 2014
@@ -139,8 +139,11 @@ public class TestBackwardsCompatibility
mp.setUseCompoundFile(false);
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
// TODO: remove randomness
- IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
.setMergePolicy(mp);
conf.setCodec(Codec.forName("Lucene40"));
IndexWriter writer = new IndexWriter(dir, conf);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java Tue Mar 11 19:03:45 2014
@@ -30,6 +30,7 @@ import org.apache.lucene.search.similari
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
/**
*
@@ -41,8 +42,11 @@ public class TestCustomNorms extends Luc
public void testFloatNorms() throws IOException {
Directory dir = newDirectory();
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(random()));
+ analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Tue Mar 11 19:03:45 2014
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Random;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
@@ -63,9 +62,12 @@ public class TestDuelingCodecs extends L
long seed = random().nextLong();
// must use same seed because of random payloads, etc
- Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
- Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
-
+ int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
+ MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
+ leftAnalyzer.setMaxTokenLength(maxTermLength);
+ MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
+ rightAnalyzer.setMaxTokenLength(maxTermLength);
+
// but these can be different
// TODO: this turns this into a really big test of Multi*, is that what we want?
IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java Tue Mar 11 19:03:45 2014
@@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolic
AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
Directory dir = newDirectory();
MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(random())).setFlushPolicy(flushPolicy);
+ analyzer).setFlushPolicy(flushPolicy);
final int numDWPT = 1 + atLeast(2);
DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
numDWPT);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java Tue Mar 11 19:03:45 2014
@@ -54,7 +54,10 @@ public class TestForceMergeForever exten
public void test() throws Exception {
final Directory d = newDirectory();
- final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
// Try to make an index that requires merging:
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java Tue Mar 11 19:03:45 2014
@@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDes
System.out.println("TEST: iter=" + iter);
}
try {
- IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
if (VERBOSE) {
// Do this ourselves instead of relying on LTC so
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java Tue Mar 11 19:03:45 2014
@@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads
final int threadCount = TestUtil.nextInt(random(), 2, 6);
final AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
- writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
final LineFileDocs docs = new LineFileDocs(random());
final Thread[] threads = new Thread[threadCount];
final int iters = atLeast(100);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestNorms.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestNorms.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestNorms.java Tue Mar 11 19:03:45 2014
@@ -75,7 +75,10 @@ public class TestNorms extends LuceneTes
// LUCENE-1260
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
- IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
config.setSimilarity(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Document doc = new Document();
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java Tue Mar 11 19:03:45 2014
@@ -46,7 +46,10 @@ public class TestRollingUpdates extends
Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
}
- final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
final int SIZE = atLeast(20);
int id = 0;
IndexReader r = null;
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java Tue Mar 11 19:03:45 2014
@@ -44,7 +44,9 @@ public class TestTermsEnum extends Lucen
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final Directory d = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
final int numDocs = atLeast(10);
for(int docCount=0;docCount<numDocs;docCount++) {
w.addDocument(docs.nextDoc());
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java Tue Mar 11 19:03:45 2014
@@ -24,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -36,13 +38,14 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
public class TestSameScoresWithThreads extends LuceneTestCase {
public void test() throws Exception {
final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
LineFileDocs docs = new LineFileDocs(random());
int charsToIndex = atLeast(100000);
int charsIndexed = 0;
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java Tue Mar 11 19:03:45 2014
@@ -38,14 +38,15 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
public class TestNRTCachingDirectory extends LuceneTestCase {
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
- IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random(), true);
final int numDocs = TestUtil.nextInt(random(), 100, 400);
Modified: lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/lucene5487/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Tue Mar 11 19:03:45 2014
@@ -292,7 +292,10 @@ public class TestFSTs extends LuceneTest
final LineFileDocs docs = new LineFileDocs(random(), true);
final int RUN_TIME_MSEC = atLeast(500);
- final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = TestUtil.getTempDir("fstlines");
final Directory dir = newFSDirectory(tempDir);
final IndexWriter writer = new IndexWriter(dir, conf);
Modified: lucene/dev/branches/lucene5487/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java (original)
+++ lucene/dev/branches/lucene5487/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java Tue Mar 11 19:03:45 2014
@@ -128,7 +128,7 @@ public abstract class AbstractAllGroupHe
@Override
public boolean acceptsDocsOutOfOrder() {
- return true;
+ return false;
}
/**
Modified: lucene/dev/branches/lucene5487/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/branches/lucene5487/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Tue Mar 11 19:03:45 2014
@@ -380,10 +380,7 @@ public class AllGroupHeadsCollectorTest
System.out.println("\n===================================================================================");
}
- assertEquals(expectedGroupHeads.length, actualGroupHeads.length);
- for (int i = 0; i < expectedGroupHeads.length; i++) {
- assertEquals(expectedGroupHeads[i], actualGroupHeads[i]);
- }
+ assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
} finally {
QueryUtils.purgeFieldCache(r);
Modified: lucene/dev/branches/lucene5487/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/branches/lucene5487/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Tue Mar 11 19:03:45 2014
@@ -436,6 +436,7 @@ public class MemoryIndexTest extends Bas
for (int i = 0; i < numDocs; i++) {
Directory dir = newDirectory();
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+ mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
Document nextDoc = lineFileDocs.nextDoc();
Document doc = new Document();
Modified: lucene/dev/branches/lucene5487/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java (original)
+++ lucene/dev/branches/lucene5487/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java Tue Mar 11 19:03:45 2014
@@ -222,24 +222,14 @@ public final class TermsFilter extends F
}
TermsFilter test = (TermsFilter) obj;
- if (test.hashCode == hashCode && this.termsAndFields.length == test.termsAndFields.length) {
- // first check the fields before even comparing the bytes
- for (int i = 0; i < termsAndFields.length; i++) {
- TermsAndField current = termsAndFields[i];
- if (!current.equals(test.termsAndFields[i])) {
- return false;
- }
- }
- // straight byte comparison since we sort they must be identical
- int end = offsets[termsAndFields.length];
- byte[] left = this.termsBytes;
- byte[] right = test.termsBytes;
- for(int i=0;i < end;i++) {
- if (left[i] != right[i]) {
- return false;
- }
+ // first check the fields before even comparing the bytes
+ if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) {
+ int lastOffset = termsAndFields[termsAndFields.length - 1].end;
+ // compare offsets since we sort they must be identical
+ if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) {
+ // straight byte comparison since we sort they must be identical
+ return ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]);
}
- return true;
}
return false;
}
Modified: lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java (original)
+++ lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java Tue Mar 11 19:03:45 2014
@@ -17,19 +17,29 @@ package org.apache.lucene.queries;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -45,19 +55,13 @@ import org.apache.lucene.util.PriorityQu
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
public class CommonTermsQueryTest extends LuceneTestCase {
public void testBasics() throws IOException {
Directory dir = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@@ -186,7 +190,9 @@ public class CommonTermsQueryTest extend
public void testMinShouldMatch() throws IOException {
Directory dir = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@@ -344,7 +350,9 @@ public class CommonTermsQueryTest extend
@Test
public void testExtend() throws IOException {
Directory dir = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@@ -397,7 +405,9 @@ public class CommonTermsQueryTest extend
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
- RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
createRandomIndex(atLeast(50), w, random().nextLong());
DirectoryReader reader = w.getReader();
AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
Modified: lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (original)
+++ lucene/dev/branches/lucene5487/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java Tue Mar 11 19:03:45 2014
@@ -50,7 +50,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
public class TermsFilterTest extends LuceneTestCase {
@@ -297,7 +296,15 @@ public class TermsFilterTest extends Luc
}
}
}
-
+
+ public void testSingleFieldEquals() {
+ // Two terms with the same hash code
+ assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
+ TermsFilter left = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
+ TermsFilter right = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
+ assertFalse(left.equals(right));
+ }
+
public void testNoTerms() {
List<Term> emptyTerms = Collections.emptyList();
List<BytesRef> emptyBytesRef = Collections.emptyList();
Modified: lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Tue Mar 11 19:03:45 2014
@@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormat
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
- IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those
Added: lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java?rev=1576473&view=auto
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java (added)
+++ lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java Tue Mar 11 19:03:45 2014
@@ -0,0 +1,110 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Random;
+import java.util.WeakHashMap;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.util.VirtualMethod;
+
+/** A crazy {@link BulkScorer} that wraps a {@link Scorer}
+ * but shuffles the order of the collected documents. */
+public class AssertingBulkOutOfOrderScorer extends BulkScorer {
+
+ final Random random;
+ final Scorer scorer;
+
+ public AssertingBulkOutOfOrderScorer(Random random, Scorer scorer) {
+ this.random = random;
+ this.scorer = scorer;
+ }
+
+ private void shuffle(int[] docIDs, float[] scores, int[] freqs, int size) {
+ for (int i = size - 1; i > 0; --i) {
+ final int other = random.nextInt(i + 1);
+
+ final int tmpDoc = docIDs[i];
+ docIDs[i] = docIDs[other];
+ docIDs[other] = tmpDoc;
+
+ final float tmpScore = scores[i];
+ scores[i] = scores[other];
+ scores[other] = tmpScore;
+
+ final int tmpFreq = freqs[i];
+ freqs[i] = freqs[other];
+ freqs[other] = tmpFreq;
+ }
+ }
+
+ private static void flush(int[] docIDs, float[] scores, int[] freqs, int size,
+ FakeScorer scorer, Collector collector) throws IOException {
+ for (int i = 0; i < size; ++i) {
+ scorer.doc = docIDs[i];
+ scorer.freq = freqs[i];
+ scorer.score = scores[i];
+ collector.collect(scorer.doc);
+ }
+ }
+
+ @Override
+ public boolean score(Collector collector, int max) throws IOException {
+ if (scorer.docID() == -1) {
+ scorer.nextDoc();
+ }
+
+ FakeScorer fake = new FakeScorer();
+ collector.setScorer(fake);
+
+ final int bufferSize = 1 + random.nextInt(100);
+ final int[] docIDs = new int[bufferSize];
+ final float[] scores = new float[bufferSize];
+ final int[] freqs = new int[bufferSize];
+
+ int buffered = 0;
+ int doc = scorer.docID();
+ while (doc < max) {
+ docIDs[buffered] = doc;
+ scores[buffered] = scorer.score();
+ freqs[buffered] = scorer.freq();
+
+ if (++buffered == bufferSize) {
+ shuffle(docIDs, scores, freqs, buffered);
+ flush(docIDs, scores, freqs, buffered, fake, collector);
+ buffered = 0;
+ }
+ doc = scorer.nextDoc();
+ }
+
+ shuffle(docIDs, scores, freqs, buffered);
+ flush(docIDs, scores, freqs, buffered, fake, collector);
+
+ return doc != Scorer.NO_MORE_DOCS;
+ }
+
+ @Override
+ public String toString() {
+ return "AssertingBulkOutOfOrderScorer(" + scorer + ")";
+ }
+}
Modified: lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java?rev=1576473&r1=1576472&r2=1576473&view=diff
==============================================================================
--- lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java (original)
+++ lucene/dev/branches/lucene5487/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java Tue Mar 11 19:03:45 2014
@@ -34,18 +34,11 @@ public class AssertingBulkScorer extends
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class);
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class, int.class);
- // we need to track scorers using a weak hash map because otherwise we
- // could loose references because of eg.
- // AssertingScorer.score(Collector) which needs to delegate to work correctly
- private static Map<BulkScorer, WeakReference<AssertingBulkScorer>> ASSERTING_INSTANCES = Collections.synchronizedMap(new WeakHashMap<BulkScorer, WeakReference<AssertingBulkScorer>>());
-
public static BulkScorer wrap(Random random, BulkScorer other) {
if (other == null || other instanceof AssertingBulkScorer) {
return other;
}
- final AssertingBulkScorer assertScorer = new AssertingBulkScorer(random, other);
- ASSERTING_INSTANCES.put(other, new WeakReference<AssertingBulkScorer>(assertScorer));
- return assertScorer;
+ return new AssertingBulkScorer(random, other);
}
public static boolean shouldWrap(BulkScorer inScorer) {
@@ -87,4 +80,5 @@ public class AssertingBulkScorer extends
public String toString() {
return "AssertingBulkScorer(" + in + ")";
}
+
}