You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [5/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/ ...
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Mon Oct 21 18:58:24 2013
@@ -34,6 +34,7 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
+ filter.reset();
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
@@ -45,6 +46,8 @@ public class DelimitedPayloadTokenFilter
assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
assertFalse(filter.incrementToken());
+ filter.end();
+ filter.close();
}
public void testNext() throws Exception {
@@ -53,6 +56,7 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
+ filter.reset();
assertTermEquals("The", filter, null);
assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
@@ -64,6 +68,8 @@ public class DelimitedPayloadTokenFilter
assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
assertFalse(filter.incrementToken());
+ filter.end();
+ filter.close();
}
@@ -72,6 +78,7 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new FloatEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
+ filter.reset();
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
@@ -83,6 +90,8 @@ public class DelimitedPayloadTokenFilter
assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
assertFalse(filter.incrementToken());
+ filter.end();
+ filter.close();
}
public void testIntEncoding() throws Exception {
@@ -90,6 +99,7 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false), '|', new IntegerEncoder());
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
+ filter.reset();
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
@@ -101,12 +111,13 @@ public class DelimitedPayloadTokenFilter
assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
assertFalse(filter.incrementToken());
+ filter.end();
+ filter.close();
}
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
- stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
BytesRef payload = payloadAtt.getPayload();
@@ -123,7 +134,6 @@ public class DelimitedPayloadTokenFilter
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
- stream.reset();
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.toString());
BytesRef payload = payAtt.getPayload();
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -34,8 +34,8 @@ public class TestPortugueseAnalyzer exte
public void testBasics() throws IOException {
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT);
// stemming
- checkOneTermReuse(a, "quilométricas", "quilometric");
- checkOneTermReuse(a, "quilométricos", "quilometric");
+ checkOneTerm(a, "quilométricas", "quilometric");
+ checkOneTerm(a, "quilométricos", "quilometric");
// stopword
assertAnalyzesTo(a, "não", new String[] {});
}
@@ -45,8 +45,8 @@ public class TestPortugueseAnalyzer exte
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("quilométricas"), false);
Analyzer a = new PortugueseAnalyzer(TEST_VERSION_CURRENT,
PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
- checkOneTermReuse(a, "quilométricas", "quilométricas");
- checkOneTermReuse(a, "quilométricos", "quilometric");
+ checkOneTerm(a, "quilométricas", "quilométricas");
+ checkOneTerm(a, "quilométricos", "quilometric");
}
/** blast some random strings through the analyzer */
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java Mon Oct 21 18:58:24 2013
@@ -123,6 +123,6 @@ public class TestPortugueseLightStemFilt
return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java Mon Oct 21 18:58:24 2013
@@ -97,6 +97,6 @@ public class TestPortugueseMinimalStemFi
return new TokenStreamComponents(tokenizer, new PortugueseMinimalStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java Mon Oct 21 18:58:24 2013
@@ -96,6 +96,6 @@ public class TestPortugueseStemFilter ex
return new TokenStreamComponents(tokenizer, new PortugueseStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Mon Oct 21 18:58:24 2013
@@ -113,6 +113,6 @@ public class TestReverseStringFilter ext
return new TokenStreamComponents(tokenizer, new ReverseStringFilter(TEST_VERSION_CURRENT, tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -34,8 +34,8 @@ public class TestRomanianAnalyzer extend
public void testBasics() throws IOException {
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT);
// stemming
- checkOneTermReuse(a, "absenţa", "absenţ");
- checkOneTermReuse(a, "absenţi", "absenţ");
+ checkOneTerm(a, "absenţa", "absenţ");
+ checkOneTerm(a, "absenţi", "absenţ");
// stopword
assertAnalyzesTo(a, "îl", new String[] {});
}
@@ -45,8 +45,8 @@ public class TestRomanianAnalyzer extend
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("absenţa"), false);
Analyzer a = new RomanianAnalyzer(TEST_VERSION_CURRENT,
RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
- checkOneTermReuse(a, "absenţa", "absenţa");
- checkOneTermReuse(a, "absenţi", "absenţ");
+ checkOneTerm(a, "absenţa", "absenţa");
+ checkOneTerm(a, "absenţi", "absenţ");
}
/** blast some random strings through the analyzer */
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -39,9 +39,9 @@ public class TestRussianAnalyzer extends
public void testReusableTokenStream() throws Exception {
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT);
- assertAnalyzesToReuse(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
+ assertAnalyzesTo(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
new String[] { "вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавлен" });
- assertAnalyzesToReuse(a, "Ðо знание ÑÑо Ñ
ÑанилоÑÑ Ð² Ñайне",
+ assertAnalyzesTo(a, "Ðо знание ÑÑо Ñ
ÑанилоÑÑ Ð² Ñайне",
new String[] { "знан", "ÑÑ", "Ñ
Ñан", "Ñайн" });
}
@@ -50,7 +50,7 @@ public class TestRussianAnalyzer extends
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.add("пÑедÑÑавление");
Analyzer a = new RussianAnalyzer(TEST_VERSION_CURRENT, RussianAnalyzer.getDefaultStopSet() , set);
- assertAnalyzesToReuse(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
+ assertAnalyzesTo(a, "ÐмеÑÑе Ñ Ñем о Ñиле ÑлекÑÑомагниÑной ÑнеÑгии имели пÑедÑÑавление еÑе",
new String[] { "вмеÑÑ", "Ñил", "ÑлекÑÑомагниÑн", "ÑнеÑг", "имел", "пÑедÑÑавление" });
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java Mon Oct 21 18:58:24 2013
@@ -75,6 +75,6 @@ public class TestRussianLightStemFilter
return new TokenStreamComponents(tokenizer, new RussianLightStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java Mon Oct 21 18:58:24 2013
@@ -95,17 +95,19 @@ public class ShingleAnalyzerWrapperTest
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
PhraseQuery q = new PhraseQuery();
- TokenStream ts = analyzer.tokenStream("content", "this sentence");
- int j = -1;
+ try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
+ int j = -1;
- PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
- ts.reset();
- while (ts.incrementToken()) {
- j += posIncrAtt.getPositionIncrement();
- String termText = termAtt.toString();
- q.add(new Term("content", termText), j);
+ ts.reset();
+ while (ts.incrementToken()) {
+ j += posIncrAtt.getPositionIncrement();
+ String termText = termAtt.toString();
+ q.add(new Term("content", termText), j);
+ }
+ ts.end();
}
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
@@ -121,16 +123,16 @@ public class ShingleAnalyzerWrapperTest
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
BooleanQuery q = new BooleanQuery();
- TokenStream ts = analyzer.tokenStream("content", "test sentence");
+ try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-
- ts.reset();
-
- while (ts.incrementToken()) {
- String termText = termAtt.toString();
- q.add(new TermQuery(new Term("content", termText)),
+ ts.reset();
+ while (ts.incrementToken()) {
+ String termText = termAtt.toString();
+ q.add(new TermQuery(new Term("content", termText)),
BooleanClause.Occur.SHOULD);
+ }
+ ts.end();
}
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
@@ -140,12 +142,12 @@ public class ShingleAnalyzerWrapperTest
public void testReusableTokenStream() throws Exception {
Analyzer a = new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 2);
- assertAnalyzesToReuse(a, "please divide into shingles",
+ assertAnalyzesTo(a, "please divide into shingles",
new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
new int[] { 0, 0, 7, 7, 14, 14, 19 },
new int[] { 6, 13, 13, 18, 18, 27, 27 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
- assertAnalyzesToReuse(a, "divide me up again",
+ assertAnalyzesTo(a, "divide me up again",
new String[] { "divide", "divide me", "me", "me up", "up", "up again", "again" },
new int[] { 0, 0, 7, 7, 10, 10, 13 },
new int[] { 6, 9, 9, 12, 12, 18, 18 },
@@ -155,7 +157,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4);
- assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
+ assertAnalyzesTo(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this", "please divide this sentence",
"divide", "divide this sentence", "divide this sentence into",
"this", "this sentence into", "this sentence into shingles",
@@ -168,7 +170,7 @@ public class ShingleAnalyzerWrapperTest
analyzer = new ShingleAnalyzerWrapper(
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 4, ShingleFilter.TOKEN_SEPARATOR, false, false);
- assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
+ assertAnalyzesTo(analyzer, "please divide this sentence into shingles",
new String[] { "please divide this", "please divide this sentence",
"divide this sentence", "divide this sentence into",
"this sentence into", "this sentence into shingles",
@@ -181,7 +183,7 @@ public class ShingleAnalyzerWrapperTest
public void testNonDefaultMinAndSameMaxShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
= new ShingleAnalyzerWrapper(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3);
- assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
+ assertAnalyzesTo(analyzer, "please divide this sentence into shingles",
new String[] { "please", "please divide this",
"divide", "divide this sentence",
"this", "this sentence into",
@@ -194,7 +196,7 @@ public class ShingleAnalyzerWrapperTest
analyzer = new ShingleAnalyzerWrapper(
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), 3, 3, ShingleFilter.TOKEN_SEPARATOR, false, false);
- assertAnalyzesToReuse(analyzer, "please divide this sentence into shingles",
+ assertAnalyzesTo(analyzer, "please divide this sentence into shingles",
new String[] { "please divide this",
"divide this sentence",
"this sentence into",
@@ -210,7 +212,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"", true, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
"divide", "divideinto",
"into", "intoshingles",
@@ -224,7 +226,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"", false, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "pleasedivide",
"divideinto",
"intoshingles" },
@@ -239,7 +241,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
null, true, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "please", "pleasedivide",
"divide", "divideinto",
"into", "intoshingles",
@@ -253,7 +255,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"", false, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "pleasedivide",
"divideinto",
"intoshingles" },
@@ -267,7 +269,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"<SEP>", true, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "please", "please<SEP>divide",
"divide", "divide<SEP>into",
"into", "into<SEP>shingles",
@@ -281,7 +283,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"<SEP>", false, false);
- assertAnalyzesToReuse(analyzer, "please divide into shingles",
+ assertAnalyzesTo(analyzer, "please divide into shingles",
new String[] { "please<SEP>divide",
"divide<SEP>into",
"into<SEP>shingles" },
@@ -296,7 +298,7 @@ public class ShingleAnalyzerWrapperTest
ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE,
"", false, true);
- assertAnalyzesToReuse(analyzer, "please",
+ assertAnalyzesTo(analyzer, "please",
new String[] { "please" },
new int[] { 0 },
new int[] { 6 },
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java Mon Oct 21 18:58:24 2013
@@ -24,6 +24,7 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
@@ -34,41 +35,6 @@ import org.apache.lucene.analysis.tokena
public class ShingleFilterTest extends BaseTokenStreamTestCase {
- public class TestTokenStream extends TokenStream {
-
- protected int index = 0;
- protected Token[] testToken;
-
- private CharTermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private PositionIncrementAttribute posIncrAtt;
- private TypeAttribute typeAtt;
-
- public TestTokenStream(Token[] testToken) {
- super();
- this.testToken = testToken;
- this.termAtt = addAttribute(CharTermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
- }
-
- @Override
- public final boolean incrementToken() {
- clearAttributes();
- if (index < testToken.length) {
- Token t = testToken[index++];
- termAtt.copyBuffer(t.buffer(), 0, t.length());
- offsetAtt.setOffset(t.startOffset(), t.endOffset());
- posIncrAtt.setPositionIncrement(t.getPositionIncrement());
- typeAtt.setType(TypeAttribute.DEFAULT_TYPE);
- return true;
- } else {
- return false;
- }
- }
- }
-
public static final Token[] TEST_TOKEN = new Token[] {
createToken("please", 0, 6),
createToken("divide", 7, 13),
@@ -1066,7 +1032,7 @@ public class ShingleFilterTest extends B
boolean outputUnigrams)
throws IOException {
- ShingleFilter filter = new ShingleFilter(new TestTokenStream(tokensToShingle), maxSize);
+ ShingleFilter filter = new ShingleFilter(new CannedTokenStream(tokensToShingle), maxSize);
filter.setOutputUnigrams(outputUnigrams);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
}
@@ -1076,7 +1042,7 @@ public class ShingleFilterTest extends B
String[] types, boolean outputUnigrams)
throws IOException {
ShingleFilter filter
- = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+ = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
filter.setOutputUnigrams(outputUnigrams);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
}
@@ -1087,7 +1053,7 @@ public class ShingleFilterTest extends B
boolean outputUnigramsIfNoShingles)
throws IOException {
ShingleFilter filter
- = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+ = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
filter.setOutputUnigrams(outputUnigrams);
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
@@ -1098,7 +1064,7 @@ public class ShingleFilterTest extends B
String[] types, boolean outputUnigrams)
throws IOException {
ShingleFilter filter
- = new ShingleFilter(new TestTokenStream(tokensToShingle), minSize, maxSize);
+ = new ShingleFilter(new CannedTokenStream(tokensToShingle), minSize, maxSize);
filter.setTokenSeparator(tokenSeparator);
filter.setOutputUnigrams(outputUnigrams);
shingleFilterTestCommon(filter, tokensToCompare, positionIncrements, types);
@@ -1168,6 +1134,65 @@ public class ShingleFilterTest extends B
return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
+ }
+
+ public void testTrailingHole1() throws IOException {
+ // Analyzing "wizard of", where of is removed as a
+ // stopword leaving a trailing hole:
+ Token[] inputTokens = new Token[] {createToken("wizard", 0, 6)};
+ ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 9, inputTokens), 2, 2);
+
+ assertTokenStreamContents(filter,
+ new String[] {"wizard", "wizard _"},
+ new int[] {0, 0},
+ new int[] {6, 9},
+ new int[] {1, 0},
+ 9);
+ }
+
+ public void testTrailingHole2() throws IOException {
+ // Analyzing "purple wizard of", where of is removed as a
+ // stopword leaving a trailing hole:
+ Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
+ createToken("wizard", 7, 13)};
+ ShingleFilter filter = new ShingleFilter(new CannedTokenStream(1, 16, inputTokens), 2, 2);
+
+ assertTokenStreamContents(filter,
+ new String[] {"purple", "purple wizard", "wizard", "wizard _"},
+ new int[] {0, 0, 7, 7},
+ new int[] {6, 13, 13, 16},
+ new int[] {1, 0, 1, 0},
+ 16);
+ }
+
+ public void testTwoTrailingHoles() throws IOException {
+ // Analyzing "purple wizard of the", where of and the are removed as a
+ // stopwords, leaving two trailing holes:
+ Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
+ createToken("wizard", 7, 13)};
+ ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 2);
+
+ assertTokenStreamContents(filter,
+ new String[] {"purple", "purple wizard", "wizard", "wizard _"},
+ new int[] {0, 0, 7, 7},
+ new int[] {6, 13, 13, 20},
+ new int[] {1, 0, 1, 0},
+ 20);
+ }
+
+ public void testTwoTrailingHolesTriShingle() throws IOException {
+ // Analyzing "purple wizard of the", where of and the are removed as a
+ // stopwords, leaving two trailing holes:
+ Token[] inputTokens = new Token[] {createToken("purple", 0, 6),
+ createToken("wizard", 7, 13)};
+ ShingleFilter filter = new ShingleFilter(new CannedTokenStream(2, 20, inputTokens), 2, 3);
+
+ assertTokenStreamContents(filter,
+ new String[] {"purple", "purple wizard", "purple wizard _", "wizard", "wizard _", "wizard _ _"},
+ new int[] {0, 0, 0, 7, 7, 7},
+ new int[] {6, 13, 20, 13, 20, 20},
+ new int[] {1, 0, 0, 1, 0, 0},
+ 20);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java Mon Oct 21 18:58:24 2013
@@ -114,7 +114,7 @@ public class TestSnowball extends BaseTo
return new TokenStreamComponents(tokenizer, new SnowballFilter(tokenizer, lang));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -34,8 +34,8 @@ public class TestSwedishAnalyzer extends
public void testBasics() throws IOException {
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT);
// stemming
- checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
- checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+ checkOneTerm(a, "jaktkarlarne", "jaktkarl");
+ checkOneTerm(a, "jaktkarlens", "jaktkarl");
// stopword
assertAnalyzesTo(a, "och", new String[] {});
}
@@ -45,8 +45,8 @@ public class TestSwedishAnalyzer extends
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("jaktkarlarne"), false);
Analyzer a = new SwedishAnalyzer(TEST_VERSION_CURRENT,
SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
- checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
- checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+ checkOneTerm(a, "jaktkarlarne", "jaktkarlarne");
+ checkOneTerm(a, "jaktkarlens", "jaktkarl");
}
/** blast some random strings through the analyzer */
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java Mon Oct 21 18:58:24 2013
@@ -75,6 +75,6 @@ public class TestSwedishLightStemFilter
return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java Mon Oct 21 18:58:24 2013
@@ -44,7 +44,7 @@ public class TestSolrSynonymParser exten
"this test, that testing";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
final SynonymMap map = parser.build();
Analyzer analyzer = new Analyzer() {
@@ -77,7 +77,7 @@ public class TestSolrSynonymParser exten
public void testInvalidDoubleMap() throws Exception {
String testFile = "a => b => c";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
}
/** parse a syn file with bad syntax */
@@ -85,7 +85,7 @@ public class TestSolrSynonymParser exten
public void testInvalidAnalyzesToNothingOutput() throws Exception {
String testFile = "a => 1";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
}
/** parse a syn file with bad syntax */
@@ -93,7 +93,7 @@ public class TestSolrSynonymParser exten
public void testInvalidAnalyzesToNothingInput() throws Exception {
String testFile = "1 => a";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.SIMPLE, false));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
}
/** parse a syn file with bad syntax */
@@ -101,7 +101,7 @@ public class TestSolrSynonymParser exten
public void testInvalidPositionsInput() throws Exception {
String testFile = "testola => the test";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
}
/** parse a syn file with bad syntax */
@@ -109,7 +109,7 @@ public class TestSolrSynonymParser exten
public void testInvalidPositionsOutput() throws Exception {
String testFile = "the test => testola";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
}
/** parse a syn file with some escaped syntax chars */
@@ -118,7 +118,7 @@ public class TestSolrSynonymParser exten
"a\\=>a => b\\=>b\n" +
"a\\,a => b\\,b";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
final SynonymMap map = parser.build();
Analyzer analyzer = new Analyzer() {
@Override
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilterFactory.java Mon Oct 21 18:58:24 2013
@@ -32,16 +32,33 @@ import org.apache.lucene.analysis.util.S
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
- /** test that we can parse and use the solr syn file */
- public void testSynonyms() throws Exception {
+
+ /** checks for synonyms of "GB" in synonyms.txt */
+ private void checkSolrSynonyms(TokenFilterFactory factory) throws Exception {
Reader reader = new StringReader("GB");
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- stream = tokenFilterFactory("Synonym", "synonyms", "synonyms.txt").create(stream);
+ stream = factory.create(stream);
assertTrue(stream instanceof SynonymFilter);
- assertTokenStreamContents(stream,
+ assertTokenStreamContents(stream,
new String[] { "GB", "gib", "gigabyte", "gigabytes" },
new int[] { 1, 0, 0, 0 });
}
+
+ /** checks for synonyms of "second" in synonyms-wordnet.txt */
+ private void checkWordnetSynonyms(TokenFilterFactory factory) throws Exception {
+ Reader reader = new StringReader("second");
+ TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ stream = factory.create(stream);
+ assertTrue(stream instanceof SynonymFilter);
+ assertTokenStreamContents(stream,
+ new String[] { "second", "2nd", "two" },
+ new int[] { 1, 0, 0 });
+ }
+
+ /** test that we can parse and use the solr syn file */
+ public void testSynonyms() throws Exception {
+ checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt"));
+ }
/** if the synonyms are completely empty, test that we still analyze correctly */
public void testEmptySynonyms() throws Exception {
@@ -52,6 +69,14 @@ public class TestSynonymFilterFactory ex
"synonyms", "synonyms.txt").create(stream);
assertTokenStreamContents(stream, new String[] { "GB" });
}
+
+ public void testFormat() throws Exception {
+ checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt", "format", "solr"));
+ checkWordnetSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms-wordnet.txt", "format", "wordnet"));
+ // explicit class should work the same as the "solr" alias
+ checkSolrSynonyms(tokenFilterFactory("Synonym", "synonyms", "synonyms.txt",
+ "format", SolrSynonymParser.class.getName()));
+ }
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
@@ -133,6 +158,8 @@ public class TestSynonymFilterFactory ex
// :NOOP:
}
}
+
+
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java Mon Oct 21 18:58:24 2013
@@ -624,7 +624,7 @@ public class TestSynonymMapFilter extend
"bbb => bbbb1 bbbb2\n";
SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random()));
- parser.add(new StringReader(testFile));
+ parser.parse(new StringReader(testFile));
final SynonymMap map = parser.build();
Analyzer analyzer = new Analyzer() {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java Mon Oct 21 18:58:24 2013
@@ -27,7 +27,6 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.Tokenizer;
public class TestWordnetSynonymParser extends BaseTokenStreamTestCase {
- Analyzer analyzer;
String synonymsFile =
"s(100000001,1,'woods',n,1,0).\n" +
@@ -42,7 +41,7 @@ public class TestWordnetSynonymParser ex
public void testSynonyms() throws Exception {
WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random()));
- parser.add(new StringReader(synonymsFile));
+ parser.parse(new StringReader(synonymsFile));
final SynonymMap map = parser.build();
Analyzer analyzer = new Analyzer() {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -92,14 +92,14 @@ public class TestThaiAnalyzer extends Ba
public void testReusableTokenStream() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET);
- assertAnalyzesToReuse(analyzer, "", new String[] {});
+ assertAnalyzesTo(analyzer, "", new String[] {});
- assertAnalyzesToReuse(
+ assertAnalyzesTo(
analyzer,
"à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ",
new String[] { "à¸à¸²à¸£", "à¸à¸µà¹", "à¹à¸à¹", "à¸à¹à¸à¸", "à¹à¸ªà¸à¸", "วà¹à¸²", "à¸à¸²à¸", "à¸à¸µ"});
- assertAnalyzesToReuse(
+ assertAnalyzesTo(
analyzer,
"à¸à¸£à¸´à¸©à¸±à¸à¸à¸·à¹à¸ XY&Z - à¸à¸¸à¸¢à¸à¸±à¸ xyz@demo.com",
new String[] { "à¸à¸£à¸´à¸©à¸±à¸", "à¸à¸·à¹à¸", "xy", "z", "à¸à¸¸à¸¢", "à¸à¸±à¸", "xyz", "demo.com" });
@@ -136,6 +136,6 @@ public class TestThaiAnalyzer extends Ba
return new TokenStreamComponents(tokenizer, new ThaiWordFilter(TEST_VERSION_CURRENT, tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -34,8 +34,8 @@ public class TestTurkishAnalyzer extends
public void testBasics() throws IOException {
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT);
// stemming
- checkOneTermReuse(a, "aÄacı", "aÄaç");
- checkOneTermReuse(a, "aÄaç", "aÄaç");
+ checkOneTerm(a, "aÄacı", "aÄaç");
+ checkOneTerm(a, "aÄaç", "aÄaç");
// stopword
assertAnalyzesTo(a, "dolayı", new String[] {});
}
@@ -45,8 +45,8 @@ public class TestTurkishAnalyzer extends
CharArraySet exclusionSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("aÄacı"), false);
Analyzer a = new TurkishAnalyzer(TEST_VERSION_CURRENT,
TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
- checkOneTermReuse(a, "aÄacı", "aÄacı");
- checkOneTermReuse(a, "aÄaç", "aÄaç");
+ checkOneTerm(a, "aÄacı", "aÄacı");
+ checkOneTerm(a, "aÄaç", "aÄaç");
}
/** blast some random strings through the analyzer */
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishLowerCaseFilter.java Mon Oct 21 18:58:24 2013
@@ -83,6 +83,6 @@ public class TestTurkishLowerCaseFilter
return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java Mon Oct 21 18:58:24 2013
@@ -123,18 +123,18 @@ public class TestCharTokenizers extends
int num = 1000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
String s = _TestUtil.randomUnicodeString(random());
- TokenStream ts = analyzer.tokenStream("foo", s);
- ts.reset();
- OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
- while (ts.incrementToken()) {
- String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
- for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
- cp = highlightedText.codePointAt(j);
- assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
+ try (TokenStream ts = analyzer.tokenStream("foo", s)) {
+ ts.reset();
+ OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
+ while (ts.incrementToken()) {
+ String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
+ for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
+ cp = highlightedText.codePointAt(j);
+ assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
+ }
}
+ ts.end();
}
- ts.end();
- ts.close();
}
// just for fun
checkRandomData(random(), analyzer, num);
@@ -161,18 +161,18 @@ public class TestCharTokenizers extends
int num = 1000 * RANDOM_MULTIPLIER;
for (int i = 0; i < num; i++) {
String s = _TestUtil.randomUnicodeString(random());
- TokenStream ts = analyzer.tokenStream("foo", s);
- ts.reset();
- OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
- while (ts.incrementToken()) {
- String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
- for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
- cp = highlightedText.codePointAt(j);
- assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
+ try (TokenStream ts = analyzer.tokenStream("foo", s)) {
+ ts.reset();
+ OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
+ while (ts.incrementToken()) {
+ String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
+ for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
+ cp = highlightedText.codePointAt(j);
+ assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
+ }
}
+ ts.end();
}
- ts.end();
- ts.close();
}
// just for fun
checkRandomData(random(), analyzer, num);
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java Mon Oct 21 18:58:24 2013
@@ -69,7 +69,7 @@ public class TestElision extends BaseTok
return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,7 +19,7 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-icu"/>
<dependencies>
- <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
+ <dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Mon Oct 21 18:58:24 2013
@@ -45,8 +45,7 @@ public final class ICUTokenizer extends
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
- // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
- private int usableLength = -1;
+ private int usableLength = 0;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
private int offset = 0;
@@ -120,7 +119,8 @@ public final class ICUTokenizer extends
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
final int finalOffset = (length < 0) ? offset : offset + length;
offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Mon Oct 21 18:58:24 2013
@@ -87,6 +87,6 @@ public class TestICUFoldingFilter extend
return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java Mon Oct 21 18:58:24 2013
@@ -87,6 +87,6 @@ public class TestICUNormalizer2Filter ex
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUTransformFilter.java Mon Oct 21 18:58:24 2013
@@ -109,6 +109,6 @@ public class TestICUTransformFilter exte
return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java Mon Oct 21 18:58:24 2013
@@ -207,7 +207,7 @@ public class TestICUTokenizer extends Ba
}
public void testReusableTokenStream() throws Exception {
- assertAnalyzesToReuse(a, "སྣོà½à¼à½à½à½¼à½à¼à½à½à¼à½£à½¦à¼à½ à½à½²à½¦à¼à½à½¼à½à¼à½¡à½²à½à¼à½à½²à¼à½à½à½¦à¼à½à½¼à½à¼à½ à½à½ºà½£à¼à½à½´à¼à½à½à½¼à½à¼à½à½¢à¼à½§à¼à½
à½à¼à½à½à½ºà¼à½à½à½à¼à½à½à½²à½¦à¼à½¦à½¼à¼ à¼",
+ assertAnalyzesTo(a, "སྣོà½à¼à½à½à½¼à½à¼à½à½à¼à½£à½¦à¼à½ à½à½²à½¦à¼à½à½¼à½à¼à½¡à½²à½à¼à½à½²à¼à½à½à½¦à¼à½à½¼à½à¼à½ à½à½ºà½£à¼à½à½´à¼à½à½à½¼à½à¼à½à½¢à¼à½§à¼à½
à½à¼à½à½à½ºà¼à½à½à½à¼à½à½à½²à½¦à¼à½¦à½¼à¼ à¼",
new String[] { "སྣོà½", "à½à½à½¼à½", "à½à½", "ལས", "འà½à½²à½¦", "à½à½¼à½", "ཡིà½", "à½à½²", "à½à½à½¦", "à½à½¼à½",
"འà½à½ºà½£", "à½à½´", "à½à½à½¼à½", "à½à½¢", "ཧ", "à½
à½", "à½à½à½º", "à½à½à½", "à½à½à½²à½¦", "སོ" });
}
@@ -249,16 +249,16 @@ public class TestICUTokenizer extends Ba
}
public void testTokenAttributes() throws Exception {
- TokenStream ts = a.tokenStream("dummy", "This is a test");
- ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
- ts.reset();
- while (ts.incrementToken()) {
- assertEquals(UScript.LATIN, scriptAtt.getCode());
- assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName());
- assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName());
- assertTrue(ts.reflectAsString(false).contains("script=Latin"));
+ try (TokenStream ts = a.tokenStream("dummy", "This is a test")) {
+ ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
+ ts.reset();
+ while (ts.incrementToken()) {
+ assertEquals(UScript.LATIN, scriptAtt.getCode());
+ assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName());
+ assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName());
+ assertTrue(ts.reflectAsString(false).contains("script=Latin"));
+ }
+ ts.end();
}
- ts.end();
- ts.close();
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java Mon Oct 21 18:58:24 2013
@@ -191,14 +191,14 @@ public class TestWithCJKBigramFilter ext
}
public void testReusableTokenStream() throws IOException {
- assertAnalyzesToReuse(analyzer, "ãããããabcããããã",
+ assertAnalyzesTo(analyzer, "ãããããabcããããã",
new String[] { "ãã", "ãã", "ãã", "ãã", "abc", "ãã", "ãã", "ãã", "ãã" },
new int[] { 0, 1, 2, 3, 5, 8, 9, 10, 11 },
new int[] { 2, 3, 4, 5, 8, 10, 11, 12, 13 },
new String[] { "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>" },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1});
- assertAnalyzesToReuse(analyzer, "ãããããabãcãããã ã",
+ assertAnalyzesTo(analyzer, "ãããããabãcãããã ã",
new String[] { "ãã", "ãã", "ãã", "ãã", "ab", "ã", "c", "ãã", "ãã", "ãã", "ã" },
new int[] { 0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14 },
new int[] { 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15 },
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/ivy.xml Mon Oct 21 18:58:24 2013
@@ -25,10 +25,10 @@
</configurations>
<dependencies>
- <dependency org="mecab" name="mecab-ipadic" rev="2.7.0-20070801" conf="ipadic->default">
+ <dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic->default">
<artifact name="ipadic" type=".tar.gz" url="http://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz"/>
</dependency>
- <dependency org="mecab" name="mecab-naist-jdic" rev="0.6.3b-20111013" conf="naist->default">
+ <dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist->default">
<artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/>
</dependency>
</dependencies>
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Mon Oct 21 18:58:24 2013
@@ -243,7 +243,7 @@ public final class JapaneseTokenizer ext
outputCompounds = false;
break;
}
- buffer.reset(null); // best effort NPE consumers that don't call reset()
+ buffer.reset(this.input);
resetState();
@@ -261,7 +261,14 @@ public final class JapaneseTokenizer ext
}
@Override
+ public void close() throws IOException {
+ super.close();
+ buffer.reset(input);
+ }
+
+ @Override
public void reset() throws IOException {
+ super.reset();
buffer.reset(input);
resetState();
}
@@ -280,7 +287,8 @@ public final class JapaneseTokenizer ext
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
// Set final offset
int finalOffset = correctOffset(pos);
offsetAtt.setOffset(finalOffset, finalOffset);
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java Mon Oct 21 18:58:24 2013
@@ -53,11 +53,13 @@ public class TestExtendedMode extends Ba
int numIterations = atLeast(1000);
for (int i = 0; i < numIterations; i++) {
String s = _TestUtil.randomUnicodeString(random(), 100);
- TokenStream ts = analyzer.tokenStream("foo", s);
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
- ts.reset();
- while (ts.incrementToken()) {
- assertTrue(UnicodeUtil.validUTF16String(termAtt));
+ try (TokenStream ts = analyzer.tokenStream("foo", s)) {
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
+ while (ts.incrementToken()) {
+ assertTrue(UnicodeUtil.validUTF16String(termAtt));
+ }
+ ts.end();
}
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java Mon Oct 21 18:58:24 2013
@@ -75,6 +75,6 @@ public class TestJapaneseBaseFormFilter
return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java Mon Oct 21 18:58:24 2013
@@ -94,6 +94,6 @@ public class TestJapaneseKatakanaStemFil
return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseReadingFormFilter.java Mon Oct 21 18:58:24 2013
@@ -103,6 +103,6 @@ public class TestJapaneseReadingFormFilt
return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
}
};
- checkOneTermReuse(a, "", "");
+ checkOneTerm(a, "", "");
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Mon Oct 21 18:58:24 2013
@@ -141,13 +141,13 @@ public class TestJapaneseTokenizer exten
* ideally the test would actually fail instead of hanging...
*/
public void testDecomposition5() throws Exception {
- TokenStream ts = analyzer.tokenStream("bogus", "ãããããããããããããããããããããããããããããããããããããããã");
- ts.reset();
- while (ts.incrementToken()) {
+ try (TokenStream ts = analyzer.tokenStream("bogus", "ãããããããããããããããããããããããããããããããããããããããã")) {
+ ts.reset();
+ while (ts.incrementToken()) {
+ }
+ ts.end();
}
- ts.end();
- ts.close();
}
/*
@@ -213,9 +213,11 @@ public class TestJapaneseTokenizer exten
public void testLargeDocReliability() throws Exception {
for (int i = 0; i < 100; i++) {
String s = _TestUtil.randomUnicodeString(random(), 10000);
- TokenStream ts = analyzer.tokenStream("foo", s);
- ts.reset();
- while (ts.incrementToken()) {
+ try (TokenStream ts = analyzer.tokenStream("foo", s)) {
+ ts.reset();
+ while (ts.incrementToken()) {
+ }
+ ts.end();
}
}
}
@@ -234,27 +236,31 @@ public class TestJapaneseTokenizer exten
System.out.println("\nTEST: iter=" + i);
}
String s = _TestUtil.randomUnicodeString(random(), 100);
- TokenStream ts = analyzer.tokenStream("foo", s);
- CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
- ts.reset();
- while (ts.incrementToken()) {
- assertTrue(UnicodeUtil.validUTF16String(termAtt));
+ try (TokenStream ts = analyzer.tokenStream("foo", s)) {
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+ ts.reset();
+ while (ts.incrementToken()) {
+ assertTrue(UnicodeUtil.validUTF16String(termAtt));
+ }
+ ts.end();
}
}
}
public void testOnlyPunctuation() throws IOException {
- TokenStream ts = analyzerNoPunct.tokenStream("foo", "ãããã");
- ts.reset();
- assertFalse(ts.incrementToken());
- ts.end();
+ try (TokenStream ts = analyzerNoPunct.tokenStream("foo", "ãããã")) {
+ ts.reset();
+ assertFalse(ts.incrementToken());
+ ts.end();
+ }
}
public void testOnlyPunctuationExtended() throws IOException {
- TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......");
- ts.reset();
- assertFalse(ts.incrementToken());
- ts.end();
+ try (TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......")) {
+ ts.reset();
+ assertFalse(ts.incrementToken());
+ ts.end();
+ }
}
// note: test is kinda silly since kuromoji emits punctuation tokens.
@@ -365,75 +371,81 @@ public class TestJapaneseTokenizer exten
}
private void assertReadings(String input, String... readings) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
- ts.reset();
- for(String reading : readings) {
- assertTrue(ts.incrementToken());
- assertEquals(reading, readingAtt.getReading());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
+ ts.reset();
+ for(String reading : readings) {
+ assertTrue(ts.incrementToken());
+ assertEquals(reading, readingAtt.getReading());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
private void assertPronunciations(String input, String... pronunciations) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
- ts.reset();
- for(String pronunciation : pronunciations) {
- assertTrue(ts.incrementToken());
- assertEquals(pronunciation, readingAtt.getPronunciation());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
+ ts.reset();
+ for(String pronunciation : pronunciations) {
+ assertTrue(ts.incrementToken());
+ assertEquals(pronunciation, readingAtt.getPronunciation());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
private void assertBaseForms(String input, String... baseForms) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class);
- ts.reset();
- for(String baseForm : baseForms) {
- assertTrue(ts.incrementToken());
- assertEquals(baseForm, baseFormAtt.getBaseForm());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class);
+ ts.reset();
+ for(String baseForm : baseForms) {
+ assertTrue(ts.incrementToken());
+ assertEquals(baseForm, baseFormAtt.getBaseForm());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
- ts.reset();
- for(String inflectionType : inflectionTypes) {
- assertTrue(ts.incrementToken());
- assertEquals(inflectionType, inflectionAtt.getInflectionType());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
+ ts.reset();
+ for(String inflectionType : inflectionTypes) {
+ assertTrue(ts.incrementToken());
+ assertEquals(inflectionType, inflectionAtt.getInflectionType());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
- ts.reset();
- for(String inflectionForm : inflectionForms) {
- assertTrue(ts.incrementToken());
- assertEquals(inflectionForm, inflectionAtt.getInflectionForm());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
+ ts.reset();
+ for(String inflectionForm : inflectionForms) {
+ assertTrue(ts.incrementToken());
+ assertEquals(inflectionForm, inflectionAtt.getInflectionForm());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
- TokenStream ts = analyzer.tokenStream("ignored", input);
- PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
- ts.reset();
- for(String partOfSpeech : partsOfSpeech) {
- assertTrue(ts.incrementToken());
- assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech());
+ try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
+ PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
+ ts.reset();
+ for(String partOfSpeech : partsOfSpeech) {
+ assertTrue(ts.incrementToken());
+ assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech());
+ }
+ assertFalse(ts.incrementToken());
+ ts.end();
}
- assertFalse(ts.incrementToken());
- ts.end();
}
public void testReadings() throws Exception {
@@ -627,9 +639,11 @@ public class TestJapaneseTokenizer exten
long totalStart = System.currentTimeMillis();
for (int i = 0; i < numIterations; i++) {
- final TokenStream ts = analyzer.tokenStream("ignored", line);
- ts.reset();
- while(ts.incrementToken());
+ try (TokenStream ts = analyzer.tokenStream("ignored", line)) {
+ ts.reset();
+ while(ts.incrementToken());
+ ts.end();
+ }
}
String[] sentences = line.split("ã|ã");
if (VERBOSE) {
@@ -639,9 +653,11 @@ public class TestJapaneseTokenizer exten
totalStart = System.currentTimeMillis();
for (int i = 0; i < numIterations; i++) {
for (String sentence: sentences) {
- final TokenStream ts = analyzer.tokenStream("ignored", sentence);
- ts.reset();
- while(ts.incrementToken());
+ try (TokenStream ts = analyzer.tokenStream("ignored", sentence)) {
+ ts.reset();
+ while(ts.incrementToken());
+ ts.end();
+ }
}
}
if (VERBOSE) {
Modified: lucene/dev/branches/lucene4956/lucene/analysis/morfologik/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/morfologik/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/morfologik/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/morfologik/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,9 +19,9 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-morfologik"/>
<dependencies>
- <dependency org="org.carrot2" name="morfologik-polish" rev="1.7.1" transitive="false"/>
- <dependency org="org.carrot2" name="morfologik-fsa" rev="1.7.1" transitive="false"/>
- <dependency org="org.carrot2" name="morfologik-stemming" rev="1.7.1" transitive="false"/>
+ <dependency org="org.carrot2" name="morfologik-polish" rev="${/org.carrot2/morfologik-polish}" transitive="false"/>
+ <dependency org="org.carrot2" name="morfologik-fsa" rev="${/org.carrot2/morfologik-fsa}" transitive="false"/>
+ <dependency org="org.carrot2" name="morfologik-stemming" rev="${/org.carrot2/morfologik-stemming}" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>