You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/07/26 15:43:29 UTC
svn commit: r1365992 - in /lucene/dev/trunk:
lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/
lucene/analysis/common/src/java/org/apache/lucene/analysis/core/
lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneou...
Author: rmuir
Date: Thu Jul 26 13:43:28 2012
New Revision: 1365992
URL: http://svn.apache.org/viewvc?rev=1365992&view=rev
Log:
LUCENE-4257: factor the getLines in REsourceLoader in WordListLoader
Added:
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java (with props)
Removed:
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java
lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/ResourceAsStreamResourceLoader.java
Modified:
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -57,12 +57,12 @@ public class MappingCharFilterFactory ex
List<String> wlist = null;
File mappingFile = new File(mapping);
if (mappingFile.exists()) {
- wlist = loader.getLines(mapping);
+ wlist = getLines(loader, mapping);
} else {
List<String> files = splitFileNames(mapping);
wlist = new ArrayList<String>();
for (String file : files) {
- List<String> lines = loader.getLines(file.trim());
+ List<String> lines = getLines(loader, file.trim());
wlist.addAll(lines);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -51,7 +51,7 @@ public class TypeTokenFilterFactory exte
if (files.size() > 0) {
stopTypes = new HashSet<String>();
for (String file : files) {
- List<String> typesLines = loader.getLines(file.trim());
+ List<String> typesLines = getLines(loader, file.trim());
stopTypes.addAll(typesLines);
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -49,7 +49,7 @@ public class StemmerOverrideFilterFactor
dictionary = new CharArrayMap<String>(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
- List<String> list = loader.getLines(file.trim());
+ List<String> list = getLines(loader, file.trim());
for (String line : list) {
String[] mapping = line.split("\t", 2);
dictionary.put(mapping[0], mapping[1]);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -63,7 +63,7 @@ public class WordDelimiterFilterFactory
List<String> files = splitFileNames( types );
List<String> wlist = new ArrayList<String>();
for( String file : files ){
- List<String> lines = loader.getLines( file.trim() );
+ List<String> lines = getLines(loader, file.trim());
wlist.addAll( lines );
}
typeTable = parseTypes(wlist);
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu Jul 26 13:43:28 2012
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
- List<String> wlist = loader.getLines(file.trim());
+ List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
+
+ protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+ return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+ }
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java?rev=1365992&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Simple ResourceLoader that uses Class.getResourceAsStream
+ * and Class.forName to open resources and classes, respectively.
+ */
+public class ResourceAsStreamResourceLoader implements ResourceLoader {
+ Class<?> clazz;
+
+ public ResourceAsStreamResourceLoader(Class<?> clazz) {
+ this.clazz = clazz;
+ }
+
+ @Override
+ public InputStream openResource(String resource) throws IOException {
+ return clazz.getResourceAsStream(resource);
+ }
+
+ // TODO: do this subpackages thing... wtf is that?
+ @Override
+ public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
+ try {
+ Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
+ return clazz.newInstance();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -19,29 +19,19 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
-import java.util.List;
/**
* Abstraction for loading resources (streams, files, and classes).
*/
public interface ResourceLoader {
+ /**
+ * Opens a named resource
+ */
public InputStream openResource(String resource) throws IOException;
/**
- * Accesses a resource by name and returns the (non comment) lines
- * containing data.
- *
- * <p>
- * A comment line is any line that starts with the character "#"
- * </p>
- *
- * @param resource
- * @return a list of non-blank non-comment lines with whitespace trimmed
- * from front and back.
- * @throws IOException
+ * Creates a class of the name and expected type
*/
- public List<String> getLines(String resource) throws IOException;
-
public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Thu Jul 26 13:43:28 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
return result;
}
+ /**
+ * Accesses a resource by name and returns the (non comment) lines containing
+ * data using the given character encoding.
+ *
+ * <p>
+ * A comment line is any line that starts with the character "#"
+ * </p>
+ *
+ * @return a list of non-blank non-comment lines with whitespace trimmed
+ * @throws IOException
+ */
+ public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+ BufferedReader input = null;
+ ArrayList<String> lines;
+ boolean success = false;
+ try {
+ input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+ lines = new ArrayList<String>();
+ for (String word=null; (word=input.readLine())!=null;) {
+ // skip initial bom marker
+ if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+ word = word.substring(1);
+ // skip comments
+ if (word.startsWith("#")) continue;
+ word=word.trim();
+ // skip blank lines
+ if (word.length()==0) continue;
+ lines.add(word);
+ }
+ success = true;
+ return lines;
+ } finally {
+ if (success) {
+ IOUtils.close(input);
+ } else {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+ }
+
private static BufferedReader getBufferedReader(Reader reader) {
return (reader instanceof BufferedReader) ? (BufferedReader) reader
: new BufferedReader(reader);
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.util;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
/** Fake resource loader for tests: works if you want to fake reading a single file */
public class StringMockResourceLoader implements ResourceLoader {
@@ -31,10 +29,6 @@ public class StringMockResourceLoader im
this.text = text;
}
- public List<String> getLines(String resource) throws IOException {
- return Arrays.asList(text.split("\n"));
- }
-
// TODO: do this subpackages thing... wtf is that?
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
try {
Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -33,10 +33,6 @@ class StringMockResourceLoader implement
this.text = text;
}
- public List<String> getLines(String resource) throws IOException {
- return Arrays.asList(text.split("\n"));
- }
-
// TODO: do this subpackages thing... wtf is that?
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
try {
Modified: lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
/**
* Tests for {@link StempelPolishStemFilterFactory}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.analysis.util.R
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.analysis.util.AnalysisSPILoader;
+import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.WeakIdentityMap;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.handler.admin.CoreAdminHandler;
@@ -337,34 +338,12 @@ public class SolrResourceLoader implemen
public List<String> getLines(String resource, Charset charset) throws IOException{
- BufferedReader input = null;
- ArrayList<String> lines;
try {
- input = new BufferedReader(new InputStreamReader(openResource(resource),
- charset.newDecoder()
- .onMalformedInput(CodingErrorAction.REPORT)
- .onUnmappableCharacter(CodingErrorAction.REPORT)));
-
- lines = new ArrayList<String>();
- for (String word=null; (word=input.readLine())!=null;) {
- // skip initial bom marker
- if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
- word = word.substring(1);
- // skip comments
- if (word.startsWith("#")) continue;
- word=word.trim();
- // skip blank lines
- if (word.length()==0) continue;
- lines.add(word);
- }
+ return WordlistLoader.getLines(openResource(resource), charset);
} catch (CharacterCodingException ex) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
- "Error loading resource (wrong encoding?): " + resource, ex);
- } finally {
- if (input != null)
- input.close();
+ "Error loading resource (wrong encoding?): " + resource, ex);
}
- return lines;
}
/*
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java Thu Jul 26 13:43:28 2012
@@ -55,10 +55,6 @@ public class TestMultiWordSynonyms exten
this.text = text;
}
- public List<String> getLines(String resource) throws IOException {
- return null;
- }
-
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
return null;
}
Modified: lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -32,10 +32,6 @@ class StringMockSolrResourceLoader imple
this.text = text;
}
- public List<String> getLines(String resource) throws IOException {
- return Arrays.asList(text.split("\n"));
- }
-
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
return null;
}