You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2014/04/04 12:27:14 UTC
svn commit: r1584603 [2/12] - in /lucene/dev/branches/solr5914: ./
dev-tools/ dev-tools/idea/solr/core/src/test/ lucene/ lucene/analysis/
lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/
lucene/analysis/common/src/...
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.no;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -63,7 +64,7 @@ public final class NorwegianAnalyzer ext
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.paylo
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.BytesRef;
@@ -28,7 +29,7 @@ import org.apache.lucene.util.BytesRef;
*
**/
public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{
- protected Charset charset = Charset.forName("UTF-8");
+ protected Charset charset = StandardCharsets.UTF_8;
public IdentityEncoder() {
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java Fri Apr 4 10:27:05 2014
@@ -45,8 +45,8 @@ public class TypeAsPayloadTokenFilter ex
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String type = typeAtt.type();
- if (type != null && type.equals("") == false) {
- payloadAtt.setPayload(new BytesRef(type.getBytes("UTF-8")));
+ if (type != null && !type.isEmpty()) {
+ payloadAtt.setPayload(new BytesRef(type));
}
return true;
} else {
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pt;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -62,7 +63,7 @@ public final class PortugueseAnalyzer ex
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java Fri Apr 4 10:27:05 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -247,7 +248,7 @@ public abstract class RSLPStemmerBase {
// TODO: this parser is ugly, but works. use a jflex grammar instead.
try {
InputStream is = clazz.getResourceAsStream(resource);
- LineNumberReader r = new LineNumberReader(new InputStreamReader(is, "UTF-8"));
+ LineNumberReader r = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8));
Map<String,Step> steps = new HashMap<>();
String step;
while ((step = readLine(r)) != null) {
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.ru;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -53,7 +54,7 @@ public final class RussianAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java Fri Apr 4 10:27:05 2014
@@ -31,6 +31,12 @@ public class TokenRangeSinkFilter extend
private int count;
public TokenRangeSinkFilter(int lower, int upper) {
+ if (lower < 1) {
+ throw new IllegalArgumentException("lower must be greater than zero");
+ }
+ if (lower > upper) {
+ throw new IllegalArgumentException("lower must not be greater than upper");
+ }
this.lower = lower;
this.upper = upper;
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Fri Apr 4 10:27:05 2014
@@ -84,6 +84,9 @@ public final class ClassicTokenizer exte
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Fri Apr 4 10:27:05 2014
@@ -98,6 +98,9 @@ public final class StandardTokenizer ext
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Fri Apr 4 10:27:05 2014
@@ -84,6 +84,9 @@ public final class UAX29URLEmailTokenize
/** Set the max allowed token length. Any token longer
* than this is skipped. */
public void setMaxTokenLength(int length) {
+ if (length < 1) {
+ throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+ }
this.maxTokenLength = length;
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.sv;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -63,7 +64,7 @@ public final class SwedishAnalyzer exten
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class,
- DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java Fri Apr 4 10:27:05 2014
@@ -24,6 +24,7 @@ import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.HashMap;
import java.util.Iterator;
@@ -157,8 +158,8 @@ public class SynonymFilterFactory extend
/**
* Load synonyms with the given {@link SynonymMap.Parser} class.
*/
- private SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
- CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+ protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
+ CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Fri Apr 4 10:27:05 2014
@@ -27,6 +27,7 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -252,7 +253,7 @@ public abstract class AbstractAnalysisFa
* Returns the resource's lines (with content treated as UTF-8)
*/
protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
- return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+ return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
}
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
@@ -272,7 +273,7 @@ public abstract class AbstractAnalysisFa
Reader reader = null;
try {
stream = loader.openResource(file.trim());
- CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
+ CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(stream, decoder);
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java Fri Apr 4 10:27:05 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.IOUtils;
@@ -97,7 +98,7 @@ public abstract class StopwordAnalyzerBa
final String comment) throws IOException {
Reader reader = null;
try {
- reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
+ reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
} finally {
IOUtils.close(reader);
@@ -122,7 +123,7 @@ public abstract class StopwordAnalyzerBa
Version matchVersion) throws IOException {
Reader reader = null;
try {
- reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
+ reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8);
return WordlistLoader.getWordSet(reader, matchVersion);
} finally {
IOUtils.close(reader);
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Fri Apr 4 10:27:05 2014
@@ -69,6 +69,7 @@ org.apache.lucene.analysis.miscellaneous
org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
+org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory
org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory
org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Fri Apr 4 10:27:05 2014
@@ -23,6 +23,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
@@ -78,7 +79,7 @@ public class HTMLStripCharFilterTest ext
//Some sanity checks, but not a full-fledged check
public void testHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("htmlStripReaderTest.html");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, StandardCharsets.UTF_8));
StringBuilder builder = new StringBuilder();
int ch = -1;
while ((ch = reader.read()) != -1){
@@ -95,7 +96,7 @@ public class HTMLStripCharFilterTest ext
public void testMSWord14GeneratedHTML() throws Exception {
InputStream stream = getClass().getResourceAsStream("MS-Word 14 generated.htm");
- HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, "UTF-8"));
+ HTMLStripCharFilter reader = new HTMLStripCharFilter(new InputStreamReader(stream, StandardCharsets.UTF_8));
String gold = "This is a test";
StringBuilder builder = new StringBuilder();
int ch = 0;
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Fri Apr 4 10:27:05 2014
@@ -15,6 +15,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -269,7 +270,7 @@ public class TestUAX29URLEmailTokenizer
String luceneResourcesWikiPage;
try {
reader = new InputStreamReader(getClass().getResourceAsStream
- ("LuceneResourcesWikiPage.html"), "UTF-8");
+ ("LuceneResourcesWikiPage.html"), StandardCharsets.UTF_8);
StringBuilder builder = new StringBuilder();
char[] buffer = new char[1024];
int numCharsRead;
@@ -289,7 +290,7 @@ public class TestUAX29URLEmailTokenizer
try {
List<String> urlList = new ArrayList<>();
bufferedReader = new BufferedReader(new InputStreamReader
- (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), "UTF-8"));
+ (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), StandardCharsets.UTF_8));
String line;
while (null != (line = bufferedReader.readLine())) {
line = line.trim();
@@ -313,7 +314,7 @@ public class TestUAX29URLEmailTokenizer
String randomTextWithEmails;
try {
reader = new InputStreamReader(getClass().getResourceAsStream
- ("random.text.with.email.addresses.txt"), "UTF-8");
+ ("random.text.with.email.addresses.txt"), StandardCharsets.UTF_8);
StringBuilder builder = new StringBuilder();
char[] buffer = new char[1024];
int numCharsRead;
@@ -334,7 +335,7 @@ public class TestUAX29URLEmailTokenizer
List<String> emailList = new ArrayList<>();
bufferedReader = new BufferedReader(new InputStreamReader
(getClass().getResourceAsStream
- ("email.addresses.from.random.text.with.email.addresses.txt"), "UTF-8"));
+ ("email.addresses.from.random.text.with.email.addresses.txt"), StandardCharsets.UTF_8));
String line;
while (null != (line = bufferedReader.readLine())) {
line = line.trim();
@@ -383,7 +384,7 @@ public class TestUAX29URLEmailTokenizer
String randomTextWithURLs;
try {
reader = new InputStreamReader(getClass().getResourceAsStream
- ("random.text.with.urls.txt"), "UTF-8");
+ ("random.text.with.urls.txt"), StandardCharsets.UTF_8);
StringBuilder builder = new StringBuilder();
char[] buffer = new char[1024];
int numCharsRead;
@@ -404,7 +405,7 @@ public class TestUAX29URLEmailTokenizer
List<String> urlList = new ArrayList<>();
bufferedReader = new BufferedReader(new InputStreamReader
(getClass().getResourceAsStream
- ("urls.from.random.text.with.urls.txt"), "UTF-8"));
+ ("urls.from.random.text.with.urls.txt"), StandardCharsets.UTF_8));
String line;
while (null != (line = bufferedReader.readLine())) {
line = line.trim();
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunsp
import java.io.File;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
@@ -157,7 +158,7 @@ public class TestAllDictionaries extends
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
- try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+ try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
@@ -186,7 +187,7 @@ public class TestAllDictionaries extends
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
- try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+ try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunsp
import java.io.File;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
@@ -173,7 +174,7 @@ public class TestAllDictionaries2 extend
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
- try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+ try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
@@ -202,7 +203,7 @@ public class TestAllDictionaries2 extend
File f = new File(DICTIONARY_HOME, tests[i]);
assert f.exists();
- try (ZipFile zip = new ZipFile(f, IOUtils.CHARSET_UTF_8)) {
+ try (ZipFile zip = new ZipFile(f, StandardCharsets.UTF_8)) {
ZipEntry dicEntry = zip.getEntry(tests[i+1]);
assert dicEntry != null;
ZipEntry affEntry = zip.getEntry(tests[i+2]);
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java Fri Apr 4 10:27:05 2014
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import org.apache.lucene.util.BytesRef;
@@ -232,10 +233,10 @@ public class TestDictionary extends Luce
}
public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
- assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
- assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
- assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
- assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(StandardCharsets.UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(StandardCharsets.UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(StandardCharsets.UTF_8))));
+ assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(StandardCharsets.UTF_8))));
}
public void testFlagWithCrazyWhitespace() throws Exception {
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Fri Apr 4 10:27:05 2014
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.junit.Test;
public class TestLengthFilter extends BaseTokenStreamTestCase {
@@ -50,4 +51,11 @@ public class TestLengthFilter extends Ba
checkOneTerm(a, "", "");
}
+ /**
+ * checking the validity of constructor arguments
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"), -4, -1);
+ }
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java Fri Apr 4 10:27:05 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -31,21 +32,36 @@ public class TestLengthFilterFactory ext
TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer)stream).setReader(reader);
stream = tokenFilterFactory("Length",
- "min", "4",
- "max", "10").create(stream);
+ LengthFilterFactory.MIN_KEY, "4",
+ LengthFilterFactory.MAX_KEY, "10").create(stream);
assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
}
-
+
/** Test that bogus arguments result in exception */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("Length",
- "min", "4",
- "max", "5",
+ tokenFilterFactory("Length",
+ LengthFilterFactory.MIN_KEY, "4",
+ LengthFilterFactory.MAX_KEY, "5",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ /** Test that invalid arguments result in exception */
+ public void testInvalidArguments() throws Exception {
+ try {
+ Reader reader = new StringReader("foo foobar super-duper-trooper");
+ TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ ((Tokenizer)stream).setReader(reader);
+ tokenFilterFactory("Length",
+ LengthFilterFactory.MIN_KEY, "5",
+ LengthFilterFactory.MAX_KEY, "4").create(stream);
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
+ }
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java Fri Apr 4 10:27:05 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.miscellaneous;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -16,25 +17,28 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import java.io.Reader;
+import java.io.StringReader;
+
public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase {
public void test() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
- tokenizer.setReader(reader);
- // LimitTokenCountFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("LimitTokenCount",
- "maxTokenCount", "3").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+ MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+ tokenizer.setReader(reader);
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("LimitTokenCount",
+ LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
+ LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
+ }
}
public void testRequired() throws Exception {
@@ -44,15 +48,17 @@ public class TestLimitTokenCountFilterFa
fail();
} catch (IllegalArgumentException e) {
assertTrue("exception doesn't mention param: " + e.getMessage(),
- 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
+ 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
}
}
-
- /** Test that bogus arguments result in exception */
+
+ /**
+ * Test that bogus arguments result in exception
+ */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("LimitTokenCount",
- "maxTokenCount", "3",
+ tokenFilterFactory("LimitTokenCount",
+ LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java Fri Apr 4 10:27:05 2014
@@ -16,10 +16,6 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -27,11 +23,15 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.util.CharsRef;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase {
public void testMaxPosition2() throws IOException {
- for (final boolean consumeAll : new boolean[] { true, false }) {
+ for (final boolean consumeAll : new boolean[]{true, false}) {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
@@ -42,43 +42,50 @@ public class TestLimitTokenPositionFilte
}
};
- // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
- assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"),
- new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null);
- assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
- new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null);
+ // don't use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)!
+ assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"),
+ new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null);
+ assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
+ new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null);
// less than the limit, ensure we behave correctly
assertTokenStreamContents(a.tokenStream("dummy", "1 "),
- new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null);
-
+ new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null);
+
// equal to limit
- assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
- new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null);
+ assertTokenStreamContents(a.tokenStream("dummy", "1 2 "),
+ new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
}
}
-
+
public void testMaxPosition3WithSynomyms() throws IOException {
- MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
- tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
-
- SynonymMap.Builder builder = new SynonymMap.Builder(true);
- builder.add(new CharsRef("one"), new CharsRef("first"), true);
- builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
- builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
- CharsRef multiWordCharsRef = new CharsRef();
- SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
- builder.add(new CharsRef("one"), multiWordCharsRef, true);
- SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
- builder.add(new CharsRef("two"), multiWordCharsRef, true);
- SynonymMap synonymMap = builder.build();
- TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
- stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false
-
- // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
- assertTokenStreamContents(stream,
- new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" },
- new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
-
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+
+ SynonymMap.Builder builder = new SynonymMap.Builder(true);
+ builder.add(new CharsRef("one"), new CharsRef("first"), true);
+ builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
+ builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
+ CharsRef multiWordCharsRef = new CharsRef();
+ SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
+ builder.add(new CharsRef("one"), multiWordCharsRef, true);
+ SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
+ builder.add(new CharsRef("two"), multiWordCharsRef, true);
+ SynonymMap synonymMap = builder.build();
+ TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
+ stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
+
+ // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
+ assertTokenStreamContents(stream,
+ new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
+ new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new LimitTokenPositionFilter(whitespaceMockTokenizer("one two three four five"), 0);
}
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java Fri Apr 4 10:27:05 2014
@@ -16,26 +16,30 @@ package org.apache.lucene.analysis.misce
* limitations under the License.
*/
-import java.io.Reader;
-import java.io.StringReader;
-
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+import java.io.Reader;
+import java.io.StringReader;
+
public class TestLimitTokenPositionFilterFactory extends BaseTokenStreamFactoryTestCase {
public void testMaxPosition1() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
- // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "1").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("LimitTokenPosition",
+ LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+ LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"A1"});
+ }
}
-
+
public void testMissingParam() throws Exception {
try {
tokenFilterFactory("LimitTokenPosition");
@@ -47,34 +51,31 @@ public class TestLimitTokenPositionFilte
}
public void testMaxPosition1WithShingles() throws Exception {
- Reader reader = new StringReader("one two three four five");
- MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
- // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
- tokenizer.setEnableChecks(false);
- TokenStream stream = tokenizer;
- stream = tokenFilterFactory("Shingle",
- "minShingleSize", "2",
- "maxShingleSize", "3",
- "outputUnigrams", "true").create(stream);
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "1").create(stream);
- assertTokenStreamContents(stream, new String[] { "one", "one two", "one two three" });
- }
-
- public void testConsumeAllTokens() throws Exception {
- Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
- TokenStream stream = whitespaceMockTokenizer(reader);
- stream = tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "3",
- "consumeAllTokens", "true").create(stream);
- assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+ for (final boolean consumeAll : new boolean[]{true, false}) {
+ Reader reader = new StringReader("one two three four five");
+ MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+ // if we are consuming all tokens, we can use the checks, otherwise we can't
+ tokenizer.setEnableChecks(consumeAll);
+ TokenStream stream = tokenizer;
+ stream = tokenFilterFactory("Shingle",
+ "minShingleSize", "2",
+ "maxShingleSize", "3",
+ "outputUnigrams", "true").create(stream);
+ stream = tokenFilterFactory("LimitTokenPosition",
+ LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+ LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+ ).create(stream);
+ assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"});
+ }
}
-
- /** Test that bogus arguments result in exception */
+
+ /**
+ * Test that bogus arguments result in exception
+ */
public void testBogusArguments() throws Exception {
try {
- tokenFilterFactory("LimitTokenPosition",
- "maxTokenPosition", "3",
+ tokenFilterFactory("LimitTokenPosition",
+ "maxTokenPosition", "3",
"bogusArg", "bogusValue");
fail();
} catch (IllegalArgumentException expected) {
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java Fri Apr 4 10:27:05 2014
@@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
public class DelimitedPayloadTokenFilterTest extends BaseTokenStreamTestCase {
@@ -37,15 +38,15 @@ public class DelimitedPayloadTokenFilter
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
filter.reset();
assertTermEquals("The", filter, termAtt, payAtt, null);
- assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
- assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
- assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
- assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
+ assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8));
assertTermEquals("over", filter, termAtt, payAtt, null);
assertTermEquals("the", filter, termAtt, payAtt, null);
- assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
- assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
- assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
+ assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
assertFalse(filter.incrementToken());
filter.end();
filter.close();
@@ -59,15 +60,15 @@ public class DelimitedPayloadTokenFilter
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
filter.reset();
assertTermEquals("The", filter, null);
- assertTermEquals("quick", filter, "JJ".getBytes("UTF-8"));
- assertTermEquals("red", filter, "JJ".getBytes("UTF-8"));
- assertTermEquals("fox", filter, "NN".getBytes("UTF-8"));
- assertTermEquals("jumped", filter, "VB".getBytes("UTF-8"));
+ assertTermEquals("quick", filter, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("red", filter, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("fox", filter, "NN".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("jumped", filter, "VB".getBytes(StandardCharsets.UTF_8));
assertTermEquals("over", filter, null);
assertTermEquals("the", filter, null);
- assertTermEquals("lazy", filter, "JJ".getBytes("UTF-8"));
- assertTermEquals("brown", filter, "JJ".getBytes("UTF-8"));
- assertTermEquals("dogs", filter, "NN".getBytes("UTF-8"));
+ assertTermEquals("lazy", filter, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("brown", filter, "JJ".getBytes(StandardCharsets.UTF_8));
+ assertTermEquals("dogs", filter, "NN".getBytes(StandardCharsets.UTF_8));
assertFalse(filter.incrementToken());
filter.end();
filter.close();
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java Fri Apr 4 10:27:05 2014
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokena
import java.io.IOException;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
@@ -41,8 +42,8 @@ public class TypeAsPayloadTokenFilterTes
while (nptf.incrementToken()) {
assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
- String type = new String(payloadAtt.getPayload().bytes, "UTF-8");
- assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
+ String type = payloadAtt.getPayload().utf8ToString();
+ assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()));
count++;
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java Fri Apr 4 10:27:05 2014
@@ -1,11 +1,12 @@
package org.apache.lucene.analysis.sinks;
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
@@ -21,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
+import org.junit.Test;
public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
@@ -29,20 +31,25 @@ public class TokenRangeSinkTokenizerTest
String test = "The quick red fox jumped over the lazy brown dogs";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(whitespaceMockTokenizer(test));
TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
-
+
int count = 0;
tee.reset();
while(tee.incrementToken()) {
count++;
}
-
+
int sinkCount = 0;
rangeToks.reset();
while (rangeToks.incrementToken()) {
sinkCount++;
}
-
+
assertTrue(count + " does not equal: " + 10, count == 10);
assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
}
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testIllegalArguments() throws Exception {
+ new TokenRangeSinkFilter(4, 2);
+ }
}
\ No newline at end of file
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java Fri Apr 4 10:27:05 2014
@@ -172,4 +172,13 @@ public class TestUAX29URLEmailTokenizerF
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
}
+
+ public void testIllegalArguments() throws Exception {
+ try {
+ tokenizerFactory("UAX29URLEmail", "maxTokenLength", "-1").create();
+ fail();
+ } catch (IllegalArgumentException expected) {
+ assertTrue(expected.getMessage().contains("maxTokenLength must be greater than zero"));
+ }
+ }
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java Fri Apr 4 10:27:05 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
/** Fake resource loader for tests: works if you want to fake reading a single file */
public class StringMockResourceLoader implements ResourceLoader {
@@ -50,6 +51,6 @@ public class StringMockResourceLoader im
@Override
public InputStream openResource(String resource) throws IOException {
- return new ByteArrayInputStream(text.getBytes("UTF-8"));
+ return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
}
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java Fri Apr 4 10:27:05 2014
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
@@ -49,7 +50,7 @@ public class TestFilesystemResourceLoade
private void assertClasspathDelegation(ResourceLoader rl) throws Exception {
// try a stopwords file from classpath
CharArraySet set = WordlistLoader.getSnowballWordSet(
- new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), IOUtils.CHARSET_UTF_8),
+ new InputStreamReader(rl.openResource("org/apache/lucene/analysis/snowball/english_stop.txt"), StandardCharsets.UTF_8),
TEST_VERSION_CURRENT
);
assertTrue(set.contains("you"));
@@ -64,7 +65,7 @@ public class TestFilesystemResourceLoade
final File base = TestUtil.createTempDir("fsResourceLoaderBase").getAbsoluteFile();
try {
base.mkdirs();
- Writer os = new OutputStreamWriter(new FileOutputStream(new File(base, "template.txt")), IOUtils.CHARSET_UTF_8);
+ Writer os = new OutputStreamWriter(new FileOutputStream(new File(base, "template.txt")), StandardCharsets.UTF_8);
try {
os.write("foobar\n");
} finally {
@@ -72,28 +73,28 @@ public class TestFilesystemResourceLoade
}
ResourceLoader rl = new FilesystemResourceLoader(base);
- assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), IOUtils.CHARSET_UTF_8).get(0));
+ assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0));
// Same with full path name:
String fullPath = new File(base, "template.txt").toString();
assertEquals("foobar",
- WordlistLoader.getLines(rl.openResource(fullPath), IOUtils.CHARSET_UTF_8).get(0));
+ WordlistLoader.getLines(rl.openResource(fullPath), StandardCharsets.UTF_8).get(0));
assertClasspathDelegation(rl);
assertNotFound(rl);
// now use RL without base dir:
rl = new FilesystemResourceLoader();
assertEquals("foobar",
- WordlistLoader.getLines(rl.openResource(new File(base, "template.txt").toString()), IOUtils.CHARSET_UTF_8).get(0));
+ WordlistLoader.getLines(rl.openResource(new File(base, "template.txt").toString()), StandardCharsets.UTF_8).get(0));
assertClasspathDelegation(rl);
assertNotFound(rl);
} finally {
- TestUtil.rmDir(base);
+ TestUtil.rm(base);
}
}
public void testDelegation() throws Exception {
ResourceLoader rl = new FilesystemResourceLoader(null, new StringMockResourceLoader("foobar\n"));
- assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), IOUtils.CHARSET_UTF_8).get(0));
+ assertEquals("foobar", WordlistLoader.getLines(rl.openResource("template.txt"), StandardCharsets.UTF_8).get(0));
}
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java Fri Apr 4 10:27:05 2014
@@ -25,6 +25,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
+import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.util.Date;
import java.util.Locale;
@@ -118,7 +119,7 @@ public class GenerateJflexTLDMacros {
connection.connect();
tldFileLastModified = connection.getLastModified();
BufferedReader reader = new BufferedReader
- (new InputStreamReader(connection.getInputStream(), "US-ASCII"));
+ (new InputStreamReader(connection.getInputStream(), StandardCharsets.US_ASCII));
try {
String line;
while (null != (line = reader.readLine())) {
@@ -150,7 +151,7 @@ public class GenerateJflexTLDMacros {
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
final Writer writer = new OutputStreamWriter
- (new FileOutputStream(outputFile), "UTF-8");
+ (new FileOutputStream(outputFile), StandardCharsets.UTF_8);
try {
writer.write(APACHE_LICENSE);
writer.write("// Generated from IANA Root Zone Database <");
Modified: lucene/dev/branches/solr5914/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java Fri Apr 4 10:27:05 2014
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.icu.s
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
-import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -132,7 +132,7 @@ public class ICUTokenizerFactory extends
StringBuilder rules = new StringBuilder();
InputStream rulesStream = loader.openResource(filename);
BufferedReader reader = new BufferedReader
- (IOUtils.getDecodingReader(rulesStream, IOUtils.CHARSET_UTF_8));
+ (IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8));
String line = null;
while ((line = reader.readLine()) != null) {
if ( ! line.startsWith("#"))
Modified: lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java Fri Apr 4 10:27:05 2014
@@ -35,6 +35,7 @@ import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URL;
import java.net.URLConnection;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
@@ -106,7 +107,7 @@ public class GenerateUTR30DataFiles {
private static void expandDataFileRules(File file) throws IOException {
final FileInputStream stream = new FileInputStream(file);
- final InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
+ final InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
final BufferedReader bufferedReader = new BufferedReader(reader);
StringBuilder builder = new StringBuilder();
String line;
@@ -154,7 +155,7 @@ public class GenerateUTR30DataFiles {
if (modified) {
System.err.println("Expanding rules in and overwriting " + file.getName());
final FileOutputStream out = new FileOutputStream(file, false);
- Writer writer = new OutputStreamWriter(out, "UTF-8");
+ Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8);
try {
writer.write(builder.toString());
} finally {
@@ -178,8 +179,8 @@ public class GenerateUTR30DataFiles {
System.err.print("Downloading " + NFKC_CF_TXT + " and making diacritic rules one-way ... ");
URLConnection connection = openConnection(new URL(norm2url, NFC_TXT));
BufferedReader reader = new BufferedReader
- (new InputStreamReader(connection.getInputStream(), "UTF-8"));
- Writer writer = new OutputStreamWriter(new FileOutputStream(NFC_TXT), "UTF-8");
+ (new InputStreamReader(connection.getInputStream(), StandardCharsets.UTF_8));
+ Writer writer = new OutputStreamWriter(new FileOutputStream(NFC_TXT), StandardCharsets.UTF_8);
try {
String line;
Modified: lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java Fri Apr 4 10:27:05 2014
@@ -25,6 +25,7 @@ import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import com.ibm.icu.text.RuleBasedBreakIterator;
@@ -37,7 +38,7 @@ public class RBBIRuleCompiler {
static String getRules(File ruleFile) throws IOException {
StringBuilder rules = new StringBuilder();
InputStream in = new FileInputStream(ruleFile);
- BufferedReader cin = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+ BufferedReader cin = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
String line = null;
while ((line = cin.readLine()) != null) {
if (!line.startsWith("#"))
Modified: lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java Fri Apr 4 10:27:05 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.ja;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.util.ResourceLoader;
@@ -52,6 +53,6 @@ class StringMockResourceLoader implement
@Override
public InputStream openResource(String resource) throws IOException {
- return new ByteArrayInputStream(text.getBytes("UTF-8"));
+ return new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
}
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java Fri Apr 4 10:27:05 2014
@@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
@@ -34,7 +35,6 @@ import org.apache.lucene.analysis.ja.dic
import org.apache.lucene.analysis.ja.dict.UserDictionary;
import org.apache.lucene.analysis.ja.tokenattributes.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.LuceneTestCase.Slow;
@@ -49,7 +49,7 @@ public class TestJapaneseTokenizer exten
}
try {
try {
- Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8);
+ Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8);
return new UserDictionary(reader);
} finally {
is.close();
@@ -571,7 +571,7 @@ public class TestJapaneseTokenizer exten
/*
public void testWikipedia() throws Exception {
final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml");
- final Reader r = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
+ final Reader r = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
final long startTimeNS = System.nanoTime();
boolean done = false;
@@ -618,7 +618,7 @@ public class TestJapaneseTokenizer exten
private void doTestBocchan(int numIterations) throws Exception {
LineNumberReader reader = new LineNumberReader(new InputStreamReader(
- this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8"));
+ this.getClass().getResourceAsStream("bocchan.utf-8"), StandardCharsets.UTF_8));
String line = reader.readLine();
reader.close();
Modified: lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestSearchMode.java Fri Apr 4 10:27:05 2014
@@ -22,13 +22,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
-import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
-import org.apache.lucene.util.IOUtils;
public class TestSearchMode extends BaseTokenStreamTestCase {
private final static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt";
@@ -47,7 +46,7 @@ public class TestSearchMode extends Base
throw new FileNotFoundException("Cannot find " + SEGMENTATION_FILENAME + " in test classpath");
}
try {
- LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, IOUtils.CHARSET_UTF_8));
+ LineNumberReader reader = new LineNumberReader(new InputStreamReader(is, StandardCharsets.UTF_8));
String line = null;
while ((line = reader.readLine()) != null) {
// Remove comments
Modified: lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java Fri Apr 4 10:27:05 2014
@@ -24,6 +24,7 @@ import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
public class ConnectionCostsBuilder {
@@ -32,7 +33,7 @@ public class ConnectionCostsBuilder {
public static ConnectionCostsWriter build(String filename) throws IOException {
FileInputStream inputStream = new FileInputStream(filename);
- Charset cs = Charset.forName("US-ASCII");
+ Charset cs = StandardCharsets.US_ASCII;
CharsetDecoder decoder = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
Modified: lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/AnalyzerProfile.java Fri Apr 4 10:27:05 2014
@@ -21,10 +21,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.Properties;
-import org.apache.lucene.util.IOUtils;
-
/**
* Manages analysis data configuration for SmartChineseAnalyzer
* <p>
@@ -80,7 +79,7 @@ public class AnalyzerProfile {
Properties prop = new Properties();
try {
FileInputStream input = new FileInputStream(propFile);
- prop.load(new InputStreamReader(input, IOUtils.CHARSET_UTF_8));
+ prop.load(new InputStreamReader(input, StandardCharsets.UTF_8));
String dir = prop.getProperty("analysis.data.dir", "");
input.close();
return dir;
Modified: lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -18,18 +18,16 @@
package org.apache.lucene.analysis.cn.smart;
import java.io.IOException;
-import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
-import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
@@ -90,7 +88,7 @@ public final class SmartChineseAnalyzer
// make sure it is unmodifiable as we expose it in the outer class
return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
.getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
- IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
+ StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT,
Version.LUCENE_CURRENT));
}
}
Modified: lucene/dev/branches/solr5914/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java Fri Apr 4 10:27:05 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.pl;
import java.io.IOException;
import java.io.Reader;
+import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@@ -76,7 +77,7 @@ public final class PolishAnalyzer extend
static {
try {
DEFAULT_STOP_SET = WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class,
- DEFAULT_STOPWORD_FILE, IOUtils.CHARSET_UTF_8), "#", Version.LUCENE_CURRENT);
+ DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), "#", Version.LUCENE_CURRENT);
} catch (IOException ex) {
// default set should always be present as it is part of the
// distribution (JAR)
Modified: lucene/dev/branches/solr5914/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr5914/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java?rev=1584603&r1=1584602&r2=1584603&view=diff
==============================================================================
--- lucene/dev/branches/solr5914/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java (original)
+++ lucene/dev/branches/solr5914/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java Fri Apr 4 10:27:05 2014
@@ -65,10 +65,10 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
+import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.StringTokenizer;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@@ -139,7 +139,7 @@ public class TestCompile extends LuceneT
private static void assertTrie(Trie trie, String file, boolean usefull,
boolean storeorig) throws Exception {
LineNumberReader in = new LineNumberReader(new BufferedReader(
- new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8)));
+ new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)));
for (String line = in.readLine(); line != null; line = in.readLine()) {
try {