You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/07/26 15:43:29 UTC

svn commit: r1365992 - in /lucene/dev/trunk: lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/ lucene/analysis/common/src/java/org/apache/lucene/analysis/core/ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneou...

Author: rmuir
Date: Thu Jul 26 13:43:28 2012
New Revision: 1365992

URL: http://svn.apache.org/viewvc?rev=1365992&view=rev
Log:
LUCENE-4257: factor the getLines in REsourceLoader in WordListLoader

Added:
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java   (with props)
Removed:
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java
    lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/ResourceAsStreamResourceLoader.java
Modified:
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
    lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
    lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
    lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -57,12 +57,12 @@ public class MappingCharFilterFactory ex
       List<String> wlist = null;
       File mappingFile = new File(mapping);
       if (mappingFile.exists()) {
-        wlist = loader.getLines(mapping);
+        wlist = getLines(loader, mapping);
       } else {
         List<String> files = splitFileNames(mapping);
         wlist = new ArrayList<String>();
         for (String file : files) {
-          List<String> lines = loader.getLines(file.trim());
+          List<String> lines = getLines(loader, file.trim());
           wlist.addAll(lines);
         }
       }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -51,7 +51,7 @@ public class TypeTokenFilterFactory exte
       if (files.size() > 0) {
         stopTypes = new HashSet<String>();
         for (String file : files) {
-          List<String> typesLines = loader.getLines(file.trim());
+          List<String> typesLines = getLines(loader, file.trim());
           stopTypes.addAll(typesLines);
         }
       }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -49,7 +49,7 @@ public class StemmerOverrideFilterFactor
         dictionary = new CharArrayMap<String>(luceneMatchVersion, 
             files.size() * 10, ignoreCase);
         for (String file : files) {
-          List<String> list = loader.getLines(file.trim());
+          List<String> list = getLines(loader, file.trim());
           for (String line : list) {
             String[] mapping = line.split("\t", 2);
             dictionary.put(mapping[0], mapping[1]);

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -63,7 +63,7 @@ public class WordDelimiterFilterFactory 
       List<String> files = splitFileNames( types );
       List<String> wlist = new ArrayList<String>();
       for( String file : files ){
-        List<String> lines = loader.getLines( file.trim() );
+        List<String> lines = getLines(loader, file.trim());
         wlist.addAll( lines );
       }
       typeTable = parseTypes(wlist);

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu Jul 26 13:43:28 2012
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
       words = new CharArraySet(luceneMatchVersion,
           files.size() * 10, ignoreCase);
       for (String file : files) {
-        List<String> wlist = loader.getLines(file.trim());
+        List<String> wlist = getLines(loader, file.trim());
         words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
             ignoreCase));
       }
     }
     return words;
   }
+  
+  protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+    return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+  }
 
   /** same as {@link #getWordSet(ResourceLoader, String, boolean)},
    * except the input is in snowball format. */

Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java?rev=1365992&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java (added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Simple ResourceLoader that uses Class.getResourceAsStream
+ * and Class.forName to open resources and classes, respectively.
+ */
+public class ResourceAsStreamResourceLoader implements ResourceLoader {
+  Class<?> clazz;
+  
+  public ResourceAsStreamResourceLoader(Class<?> clazz) {
+    this.clazz = clazz;
+  }
+
+  @Override
+  public InputStream openResource(String resource) throws IOException {
+    return clazz.getResourceAsStream(resource);
+  }
+
+  // TODO: do this subpackages thing... wtf is that?
+  @Override
+  public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
+    try {
+      Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
+      return clazz.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -19,29 +19,19 @@ package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.List;
 
 /**
  * Abstraction for loading resources (streams, files, and classes).
  */
 public interface ResourceLoader {
 
+  /**
+   * Opens a named resource
+   */
   public InputStream openResource(String resource) throws IOException;
   
   /**
-   * Accesses a resource by name and returns the (non comment) lines
-   * containing data.
-   *
-   * <p>
-   * A comment line is any line that starts with the character "#"
-   * </p>
-   *
-   * @param resource
-   * @return a list of non-blank non-comment lines with whitespace trimmed
-   * from front and back.
-   * @throws IOException
+   * Creates a class of the name and expected type
    */
-  public List<String> getLines(String resource) throws IOException;
-  
   public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
 }
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Thu Jul 26 13:43:28 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
 
 import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
     return result;
   }
   
+  /**
+   * Accesses a resource by name and returns the (non comment) lines containing
+   * data using the given character encoding.
+   *
+   * <p>
+   * A comment line is any line that starts with the character "#"
+   * </p>
+   *
+   * @return a list of non-blank non-comment lines with whitespace trimmed
+   * @throws IOException
+   */
+  public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+    BufferedReader input = null;
+    ArrayList<String> lines;
+    boolean success = false;
+    try {
+      input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+      lines = new ArrayList<String>();
+      for (String word=null; (word=input.readLine())!=null;) {
+        // skip initial bom marker
+        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+          word = word.substring(1);
+        // skip comments
+        if (word.startsWith("#")) continue;
+        word=word.trim();
+        // skip blank lines
+        if (word.length()==0) continue;
+        lines.add(word);
+      }
+      success = true;
+      return lines;
+    } finally {
+      if (success) {
+        IOUtils.close(input);
+      } else {
+        IOUtils.closeWhileHandlingException(input);
+      }
+    }
+  }
+  
   private static BufferedReader getBufferedReader(Reader reader) {
     return (reader instanceof BufferedReader) ? (BufferedReader) reader
         : new BufferedReader(reader);

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -20,8 +20,6 @@ package org.apache.lucene.analysis.util;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
 
 /** Fake resource loader for tests: works if you want to fake reading a single file */
 public class StringMockResourceLoader implements ResourceLoader {
@@ -31,10 +29,6 @@ public class StringMockResourceLoader im
     this.text = text;
   }
 
-  public List<String> getLines(String resource) throws IOException {
-    return Arrays.asList(text.split("\n"));
-  }
-
   // TODO: do this subpackages thing... wtf is that?
   public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
     try {

Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java (original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -33,10 +33,6 @@ class StringMockResourceLoader implement
     this.text = text;
   }
 
-  public List<String> getLines(String resource) throws IOException {
-    return Arrays.asList(text.split("\n"));
-  }
-
   // TODO: do this subpackages thing... wtf is that?
   public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
     try {

Modified: lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java (original)
+++ lucene/dev/trunk/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java Thu Jul 26 13:43:28 2012
@@ -22,6 +22,7 @@ import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
 
 /**
  * Tests for {@link StempelPolishStemFilterFactory}

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.analysis.util.R
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.analysis.util.AnalysisSPILoader;
+import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.WeakIdentityMap;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.handler.admin.CoreAdminHandler;
@@ -337,34 +338,12 @@ public class SolrResourceLoader implemen
 
 
   public List<String> getLines(String resource, Charset charset) throws IOException{
-    BufferedReader input = null;
-    ArrayList<String> lines;
     try {
-      input = new BufferedReader(new InputStreamReader(openResource(resource),
-          charset.newDecoder()
-          .onMalformedInput(CodingErrorAction.REPORT)
-          .onUnmappableCharacter(CodingErrorAction.REPORT)));
-
-      lines = new ArrayList<String>();
-      for (String word=null; (word=input.readLine())!=null;) {
-        // skip initial bom marker
-        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
-          word = word.substring(1);
-        // skip comments
-        if (word.startsWith("#")) continue;
-        word=word.trim();
-        // skip blank lines
-        if (word.length()==0) continue;
-        lines.add(word);
-      }
+      return WordlistLoader.getLines(openResource(resource), charset);
     } catch (CharacterCodingException ex) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
-          "Error loading resource (wrong encoding?): " + resource, ex);
-    } finally {
-      if (input != null)
-        input.close();
+         "Error loading resource (wrong encoding?): " + resource, ex);
     }
-    return lines;
   }
 
   /*

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java Thu Jul 26 13:43:28 2012
@@ -55,10 +55,6 @@ public class TestMultiWordSynonyms exten
       this.text = text;
     }
 
-    public List<String> getLines(String resource) throws IOException {
-      return null;
-    }
-
     public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
       return null;
     }

Modified: lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java?rev=1365992&r1=1365991&r2=1365992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java Thu Jul 26 13:43:28 2012
@@ -32,10 +32,6 @@ class StringMockSolrResourceLoader imple
     this.text = text;
   }
 
-  public List<String> getLines(String resource) throws IOException {
-    return Arrays.asList(text.split("\n"));
-  }
-
   public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
     return null;
   }