You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2011/10/28 10:37:41 UTC

svn commit: r1190203 - in /lucene/dev/branches/branch_3x/solr: CHANGES.txt contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java

Author: dweiss
Date: Fri Oct 28 08:37:41 2011
New Revision: 1190203

URL: http://svn.apache.org/viewvc?rev=1190203&view=rev
Log:
SOLR-2862: CarrotClusteringEngine's resource locator should log the absolute location of lexical resources. Minor corrections in IResource/ IResourceLookup.

Modified:
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1190203&r1=1190202&r2=1190203&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Fri Oct 28 08:37:41 2011
@@ -133,6 +133,9 @@ Bug Fixes
 * SOLR-2588: Moved VelocityResponseWriter back to contrib module in order to 
   remove it as a mandatory core dependency.  (Erik Hatcher)
 
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering 
+  extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
 ==================  3.4.0  ==================
 
 Upgrading from Solr 3.3

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1190203&r1=1190202&r2=1190203&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Fri Oct 28 08:37:41 2011
@@ -17,8 +17,7 @@ package org.apache.solr.handler.clusteri
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -29,6 +28,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
@@ -66,13 +66,14 @@ import org.slf4j.LoggerFactory;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+import com.google.common.io.Closeables;
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
  * <p/>
  * Output from this class is subject to change.
  *
- * @link http://project.carrot2.org
+ * @see "http://project.carrot2.org"
  */
 public class CarrotClusteringEngine extends SearchClusteringEngine {
   private transient static Logger log = LoggerFactory
@@ -101,6 +102,90 @@ public class CarrotClusteringEngine exte
   private Controller controller = ControllerFactory.createPooling();
   private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
   
+  private static class SolrResourceLocator implements IResourceLocator {
+    private final SolrResourceLoader resourceLoader;
+    private final String carrot2ResourcesDir;
+
+    public SolrResourceLocator(SolrCore core, SolrParams initParams) {
+      resourceLoader = core.getResourceLoader();
+      carrot2ResourcesDir = initParams.get(
+          CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
+    }
+
+    @Override
+    public IResource[] getAll(final String resource) {
+      final String resourceName = carrot2ResourcesDir + "/" + resource;
+      log.debug("Looking for Solr resource: " + resourceName);
+
+      InputStream resourceStream = null;
+      final byte [] asBytes;
+      try {
+        resourceStream = resourceLoader.openResource(resourceName);
+        asBytes = IOUtils.toByteArray(resourceStream);
+      } catch (RuntimeException e) {
+        log.debug("Resource not found in Solr's config: " + resourceName
+            + ". Using the default " + resource + " from Carrot JAR.");          
+        return new IResource[] {};
+      } catch (IOException e) {
+        log.warn("Could not read Solr resource " + resourceName);
+        return new IResource[] {};
+      } finally {
+        if (resourceStream != null) Closeables.closeQuietly(resourceStream);
+      }
+
+      log.info("Loaded Solr resource: " + resourceName);
+
+      final IResource foundResource = new IResource() {
+        @Override
+        public InputStream open() throws IOException {
+          return new ByteArrayInputStream(asBytes);
+        }
+        
+        @Override
+        public int hashCode() {
+          // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+          // so simply rely on instance equivalence.
+          return super.hashCode();
+        }
+        
+        @Override
+        public boolean equals(Object obj) {
+          // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+          // so simply rely on instance equivalence.
+          return super.equals(obj);
+        }
+
+        @Override
+        public String toString() {
+          return "Solr config resource: " + resourceName;
+        }
+      };
+
+      return new IResource[] { foundResource };
+    }
+
+    @Override
+    public int hashCode() {
+      // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+      // so simply rely on instance equivalence.
+      return super.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+      // so simply rely on instance equivalence.
+      return super.equals(obj);
+    }
+
+    @Override
+    public String toString() {
+      return "SolrResourceLocator, " 
+          + "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath()
+          + ", Carrot2 relative lexicalResourcesDir=";
+    }
+  }
+
   @Override
   @Deprecated
   public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
@@ -154,53 +239,26 @@ public class CarrotClusteringEngine exte
     // Customize the stemmer and tokenizer factories. The implementations we provide here
     // are included in the code base of Solr, so that it's possible to refactor
     // the Lucene APIs the factories rely on if needed.
-    // Additionally, we set a custom lexical resource factory for Carrot2 that 
-    // will use both Carrot2 default stop words as well as stop words from 
+    // Additionally, we set a custom lexical resource factory for Carrot2 that
+    // will use both Carrot2 default stop words as well as stop words from
     // the StopFilter defined on the field.
-    BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes)
-        .stemmerFactory(LuceneCarrot2StemmerFactory.class)
-        .tokenizerFactory(LuceneCarrot2TokenizerFactory.class)
-        .lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
-    
-    // Pass the schema to SolrStopwordsCarrot2LexicalDataFactory.
-    initAttributes.put("solrIndexSchema", core.getSchema());
+		BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes)
+				.stemmerFactory(LuceneCarrot2StemmerFactory.class)
+				.tokenizerFactory(LuceneCarrot2TokenizerFactory.class)
+				.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
+
+		// Pass the schema to SolrStopwordsCarrot2LexicalDataFactory.
+		initAttributes.put("solrIndexSchema", core.getSchema());
 
     // Customize Carrot2's resource lookup to first look for resources
     // using Solr's resource loader. If that fails, try loading from the classpath.
-    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
-        .resourceLookup(new ResourceLookup(new IResourceLocator() {
-          public IResource[] getAll(final String resource) {
-            final SolrResourceLoader resourceLoader = core.getResourceLoader();
-            final String carrot2ResourcesDir = initParams.get(
-                CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
-            try {
-              log.debug("Looking for " + resource + " in "
-                  + carrot2ResourcesDir);
-              final InputStream resourceStream = resourceLoader
-                  .openResource(carrot2ResourcesDir + "/" + resource);
-              
-              log.info(resource + " loaded from " + carrot2ResourcesDir);
-              final IResource foundResource = new IResource() {
-                public InputStream open() throws IOException {
-                  return resourceStream;
-                }
-              };
-              return new IResource[] { foundResource };
-            } catch (RuntimeException e) {
-              // No way to distinguish if the resource was found but failed
-              // to load or wasn't found at all, so we simply fall back
-              // to Carrot2 defaults here by returning an empty locations array.
-              log.debug(resource + " not found in " + carrot2ResourcesDir
-                  + ". Using the default " + resource + " from Carrot JAR.");
-              return new IResource[] {};
-            }
-
-          }
-        },
-        
-        // Using the class loader directly because this time we want to omit the prefix 
+    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
+      new ResourceLookup(
+        // Solr-specific resource loading.
+        new SolrResourceLocator(core, initParams),
+        // Using the class loader directly because this time we want to omit the prefix
         new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
-    
+
     this.controller.init(initAttributes);
     this.idFieldName = core.getSchema().getUniqueKeyField().getName();