You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [28/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/...
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java Mon Oct 21 18:58:24 2013
@@ -1,4 +1,5 @@
package org.apache.solr.handler.clustering;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,6 +17,15 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
@@ -28,43 +38,105 @@ import org.apache.solr.handler.component
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
+import com.google.common.collect.Maps;
/**
- * Provide a plugin for clustering results. Can either be for search results (i.e. via Carrot2) or for
- * clustering documents (i.e. via Mahout)
- * <p/>
- * This engine is experimental. Output from this engine is subject to change in future releases.
- *
- * <pre class="prettyprint" >
- * <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering">
- * <lst name="engine">
- * <str name="name">default</str>
- * <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
- * </lst>
- * </searchComponent></pre>
+ * Provides a plugin for performing cluster analysis. This can either be applied to
+ * search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
+ * clustering documents (e.g., via <a href="http://mahout.apache.org/">Mahout</a>).
+ * <p>
+ * See Solr example for configuration examples.</p>
+ *
+ * @lucene.experimental
*/
public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);
- private Map<String, SearchClusteringEngine> searchClusteringEngines = new HashMap<String, SearchClusteringEngine>();
- private Map<String, DocumentClusteringEngine> documentClusteringEngines = new HashMap<String, DocumentClusteringEngine>();
/**
- * Base name for all spell checker query parameters. This name is also used to
+ * Base name for all component parameters. This name is also used to
* register this component with SearchHandler.
*/
public static final String COMPONENT_NAME = "clustering";
- private NamedList initParams;
+
+ /**
+ * Declaration-order list of search clustering engines.
+ */
+ private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap();
+
+ /**
+ * Declaration order list of document clustering engines.
+ */
+ private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap();
+
+ /**
+ * An unmodifiable view of {@link #searchClusteringEngines}.
+ */
+ private final Map<String, SearchClusteringEngine> searchClusteringEnginesView = Collections.unmodifiableMap(searchClusteringEngines);
+
+ /**
+ * Initialization parameters temporarily saved here, the component
+ * is initialized in {@link #inform(SolrCore)} because we need to know
+ * the core's {@link SolrResourceLoader}.
+ *
+ * @see #init(NamedList)
+ */
+ private NamedList<Object> initParams;
+
+ @Override
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ public void init(NamedList args) {
+ this.initParams = args;
+ super.init(args);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void inform(SolrCore core) {
+ if (initParams != null) {
+ log.info("Initializing Clustering Engines");
+
+ // Our target list of engines, split into search-results and document clustering.
+ SolrResourceLoader loader = core.getResourceLoader();
+
+ for (Map.Entry<String,Object> entry : initParams) {
+ if ("engine".equals(entry.getKey())) {
+ NamedList<Object> engineInitParams = (NamedList<Object>) entry.getValue();
+
+ String engineClassName = StringUtils.defaultIfBlank(
+ (String) engineInitParams.get("classname"),
+ CarrotClusteringEngine.class.getName());
+
+ // Instantiate the clustering engine and split to appropriate map.
+ final ClusteringEngine engine = loader.newInstance(engineClassName, ClusteringEngine.class);
+ final String name = StringUtils.defaultIfBlank(engine.init(engineInitParams, core), "");
+ final ClusteringEngine previousEntry;
+ if (engine instanceof SearchClusteringEngine) {
+ previousEntry = searchClusteringEngines.put(name, (SearchClusteringEngine) engine);
+ } else if (engine instanceof DocumentClusteringEngine) {
+ previousEntry = documentClusteringEngines.put(name, (DocumentClusteringEngine) engine);
+ } else {
+ log.warn("Unknown type of a clustering engine for class: " + engineClassName);
+ continue;
+ }
+ if (previousEntry != null) {
+ log.warn("Duplicate clustering engine component named '" + name + "'.");
+ }
+ }
+ }
+
+ // Set up the default engine key for both types of engines.
+ setupDefaultEngine("search results clustering", searchClusteringEngines);
+ setupDefaultEngine("document clustering", documentClusteringEngines);
+
+ log.info("Finished Initializing Clustering Engines");
+ }
+ }
@Override
public void prepare(ResponseBuilder rb) throws IOException {
@@ -86,8 +158,9 @@ public class ClusteringComponent extends
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
if (engine != null) {
DocListAndSet results = rb.getResults();
- Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
- SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
+ Map<SolrDocument,Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
+ SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList(
+ results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
rb.rsp.add("clusters", clusters);
} else {
@@ -99,9 +172,9 @@ public class ClusteringComponent extends
DocumentClusteringEngine engine = documentClusteringEngines.get(name);
if (engine != null) {
boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
- NamedList nl = null;
+ NamedList<?> nl = null;
- //TODO: This likely needs to be made into a background task that runs in an executor
+ // TODO: This likely needs to be made into a background task that runs in an executor
if (useDocSet == true) {
nl = engine.cluster(rb.getResults().docSet, params);
} else {
@@ -113,7 +186,7 @@ public class ClusteringComponent extends
}
}
}
-
+
private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
return searchClusteringEngines.get(getClusteringEngineName(rb));
}
@@ -180,76 +253,13 @@ public class ClusteringComponent extends
}
}
- @Override
- @SuppressWarnings("unchecked")
- public void init(NamedList args) {
- super.init(args);
- this.initParams = args;
- }
-
- @Override
- public void inform(SolrCore core) {
- if (initParams != null) {
- log.info("Initializing Clustering Engines");
- boolean searchHasDefault = false;
- boolean documentHasDefault = false;
- for (int i = 0; i < initParams.size(); i++) {
- if (initParams.getName(i).equals("engine")) {
- NamedList engineNL = (NamedList) initParams.getVal(i);
- String className = (String) engineNL.get("classname");
- if (className == null) {
- className = CarrotClusteringEngine.class.getName();
- }
- SolrResourceLoader loader = core.getResourceLoader();
- ClusteringEngine clusterer = loader.newInstance(className, ClusteringEngine.class);
- if (clusterer != null) {
- String name = clusterer.init(engineNL, core);
- if (name != null) {
- boolean isDefault = name.equals(ClusteringEngine.DEFAULT_ENGINE_NAME);
- if (clusterer instanceof SearchClusteringEngine) {
- if (isDefault == true && searchHasDefault == false) {
- searchHasDefault = true;
- } else if (isDefault == true && searchHasDefault == true) {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- searchClusteringEngines.put(name, (SearchClusteringEngine) clusterer);
- } else if (clusterer instanceof DocumentClusteringEngine) {
- if (isDefault == true && documentHasDefault == false) {
- searchHasDefault = true;
- } else if (isDefault == true && documentHasDefault == true) {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- documentClusteringEngines.put(name, (DocumentClusteringEngine) clusterer);
- }
- } else {
- if (clusterer instanceof SearchClusteringEngine && searchHasDefault == false) {
- searchClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (SearchClusteringEngine) clusterer);
- searchHasDefault = true;
- } else if (clusterer instanceof DocumentClusteringEngine && documentHasDefault == false) {
- documentClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (DocumentClusteringEngine) clusterer);
- documentHasDefault = true;
- } else {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- }
- }
- }
- }
- log.info("Finished Initializing Clustering Engines");
- }
- }
-
- /*
- * @return Unmodifiable Map of the engines, key is the name from the config, value is the engine
- * */
- public Map<String, SearchClusteringEngine> getSearchClusteringEngines() {
- return Collections.unmodifiableMap(searchClusteringEngines);
+ /**
+ * @return Expose for tests.
+ */
+ Map<String, SearchClusteringEngine> getSearchClusteringEngines() {
+ return searchClusteringEnginesView;
}
- // ///////////////////////////////////////////
- // / SolrInfoMBean
- // //////////////////////////////////////////
-
@Override
public String getDescription() {
return "A Clustering component";
@@ -259,4 +269,23 @@ public class ClusteringComponent extends
public String getSource() {
return "$URL$";
}
+
+ /**
+ * Setup the default clustering engine.
+ * @see "https://issues.apache.org/jira/browse/SOLR-5219"
+ */
+ private static <T extends ClusteringEngine> void setupDefaultEngine(String type, LinkedHashMap<String,T> map) {
+ // If there's already a default algorithm, leave it as is.
+ if (map.containsKey(ClusteringEngine.DEFAULT_ENGINE_NAME)) {
+ return;
+ }
+
+ // If there's no default algorithm, and there are any algorithms available,
+ // the first definition becomes the default algorithm.
+ if (!map.isEmpty()) {
+ Entry<String,T> first = map.entrySet().iterator().next();
+ map.put(ClusteringEngine.DEFAULT_ENGINE_NAME, first.getValue());
+ log.info("Default engine for " + type + ": " + first.getKey());
+ }
+ }
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java Mon Oct 21 18:58:24 2013
@@ -18,19 +18,18 @@ package org.apache.solr.handler.clusteri
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
-
/**
- *
- *
- **/
+ * A base class for {@link SearchClusteringEngine} and {@link DocumentClusteringEngine}.
+ * @lucene.experimental
+ */
public class ClusteringEngine {
- private String name;
public static final String ENGINE_NAME = "name";
public static final String DEFAULT_ENGINE_NAME = "default";
- public String init(NamedList config, SolrCore core) {
- name = (String) config.get(ENGINE_NAME);
+ private String name;
+ public String init(NamedList<?> config, SolrCore core) {
+ name = (String) config.get(ENGINE_NAME);
return name;
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringParams.java Mon Oct 21 18:58:24 2013
@@ -16,11 +16,9 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
-
/**
- *
- *
- **/
+ * @lucene.experimental
+ */
public interface ClusteringParams {
public static final String CLUSTERING_PREFIX = "clustering.";
@@ -30,8 +28,9 @@ public interface ClusteringParams {
public static final String USE_SEARCH_RESULTS = CLUSTERING_PREFIX + "results";
public static final String USE_COLLECTION = CLUSTERING_PREFIX + "collection";
+
/**
- * When document clustering, cluster on the Doc Set
+ * When clustering full documents, cluster on the Doc Set.
*/
public static final String USE_DOC_SET = CLUSTERING_PREFIX + "docs.useDocSet";
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java Mon Oct 21 18:58:24 2013
@@ -20,11 +20,9 @@ import org.apache.solr.common.util.Named
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.search.DocSet;
-
/**
- * Experimental. Subject to change before the next release.
- *
- **/
+ * @lucene.experimental
+ */
public abstract class DocumentClusteringEngine extends ClusteringEngine {
/**
@@ -34,7 +32,7 @@ public abstract class DocumentClustering
* @param solrParams The params controlling clustering
* @return The clustering results
*/
- public abstract NamedList cluster(SolrParams solrParams);
+ public abstract NamedList<?> cluster(SolrParams solrParams);
/**
* Experimental. Subject to change before the next release
@@ -44,7 +42,7 @@ public abstract class DocumentClustering
* @param solrParams The params controlling the clustering
* @return The results.
*/
- public abstract NamedList cluster(DocSet docs, SolrParams solrParams);
+ public abstract NamedList<?> cluster(DocSet docs, SolrParams solrParams);
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java Mon Oct 21 18:58:24 2013
@@ -1,4 +1,5 @@
package org.apache.solr.handler.clustering;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,7 +17,6 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
-import java.io.IOException;
import java.util.Map;
import java.util.Set;
@@ -24,20 +24,19 @@ import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.DocList;
-import org.apache.solr.util.SolrPluginUtils;
-
+import org.apache.solr.response.SolrQueryResponse;
/**
- *
- *
- **/
+ * Base class for clustering engines performing cluster analysis on search
+ * results.
+ *
+ * @lucene.experimental
+ */
public abstract class SearchClusteringEngine extends ClusteringEngine {
-
- @Deprecated
- public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
-
- // TODO: need DocList, too?
+ /**
+ * Do the clustering, return a clusters structure to be appended to
+ * {@link SolrQueryResponse}.
+ */
public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
Map<SolrDocument,Integer> docIds, SolrQueryRequest sreq);
@@ -45,15 +44,10 @@ public abstract class SearchClusteringEn
* Returns the set of field names to load.
* Concrete classes can override this method if needed.
* Default implementation returns null, that is, all stored fields are loaded.
- * @return set of field names to load
+ *
+ * @return The set of field names to load.
*/
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
return null;
}
-
- public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
- Map<SolrDocument, Integer> docIds) throws IOException{
- return SolrPluginUtils.docListToSolrDocumentList(
- docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
- }
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Mon Oct 21 18:58:24 2013
@@ -17,10 +17,7 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
-import java.io.ByteArrayInputStream;
-import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -32,7 +29,6 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.Query;
@@ -45,7 +41,7 @@ import org.apache.solr.common.params.Sol
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.handler.clustering.ClusteringEngine;
import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.highlight.SolrHighlighter;
@@ -55,7 +51,6 @@ import org.apache.solr.schema.SchemaFiel
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.util.SolrPluginUtils;
import org.carrot2.core.Cluster;
import org.carrot2.core.Controller;
import org.carrot2.core.ControllerFactory;
@@ -66,34 +61,33 @@ import org.carrot2.core.attribute.Attrib
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
+import org.carrot2.util.attribute.AttributeValueSet;
+import org.carrot2.util.attribute.AttributeValueSets;
import org.carrot2.util.resource.ClassLoaderLocator;
import org.carrot2.util.resource.IResource;
-import org.carrot2.util.resource.IResourceLocator;
import org.carrot2.util.resource.ResourceLookup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.base.Objects;
+import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-import com.google.common.io.Closer;
/**
* Search results clustering engine based on Carrot2 clustering algorithms.
- * <p/>
- * Output from this class is subject to change.
*
* @see "http://project.carrot2.org"
+ * @lucene.experimental
*/
public class CarrotClusteringEngine extends SearchClusteringEngine {
- private transient static Logger log = LoggerFactory
- .getLogger(CarrotClusteringEngine.class);
+ transient static Logger log = LoggerFactory.getLogger(CarrotClusteringEngine.class);
/**
* The subdirectory in Solr config dir to read customized Carrot2 resources from.
*/
- private static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
+ static final String CARROT_RESOURCES_PREFIX = "clustering/carrot2";
/**
* Name of Carrot2 document's field containing Solr document's identifier.
@@ -111,166 +105,69 @@ public class CarrotClusteringEngine exte
* Carrot2 controller that manages instances of clustering algorithms
*/
private Controller controller = ControllerFactory.createPooling();
+
+ /**
+ * {@link IClusteringAlgorithm} class used for actual clustering.
+ */
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
/** Solr core we're bound to. */
private SolrCore core;
- private static class SolrResourceLocator implements IResourceLocator {
- private final SolrResourceLoader resourceLoader;
- private final String carrot2ResourcesDir;
-
- public SolrResourceLocator(SolrCore core, SolrParams initParams) {
- resourceLoader = core.getResourceLoader();
- carrot2ResourcesDir = initParams.get(
- CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
- }
-
- @Override
- public IResource[] getAll(final String resource) {
- final String resourceName = carrot2ResourcesDir + "/" + resource;
- log.debug("Looking for Solr resource: " + resourceName);
-
- InputStream resourceStream = null;
- final byte [] asBytes;
- try {
- resourceStream = resourceLoader.openResource(resourceName);
- asBytes = IOUtils.toByteArray(resourceStream);
- } catch (IOException e) {
- log.debug("Resource not found in Solr's config: " + resourceName
- + ". Using the default " + resource + " from Carrot JAR.");
- return new IResource[] {};
- } finally {
- if (resourceStream != null) {
- try {
- resourceStream.close();
- } catch (IOException e) {
- // ignore.
- }
- }
- }
+ @Override
+ @SuppressWarnings("rawtypes")
+ public String init(NamedList config, final SolrCore core) {
+ this.core = core;
- log.info("Loaded Solr resource: " + resourceName);
+ String result = super.init(config, core);
+ final SolrParams initParams = SolrParams.toSolrParams(config);
- final IResource foundResource = new IResource() {
- @Override
- public InputStream open() {
- return new ByteArrayInputStream(asBytes);
- }
+ // Initialization attributes for Carrot2 controller.
+ HashMap<String, Object> initAttributes = new HashMap<String, Object>();
- @Override
- public int hashCode() {
- // In case multiple resources are found they will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- // In case multiple resources are found they will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.equals(obj);
+ // Customize Carrot2's resource lookup to first look for resources
+ // using Solr's resource loader. If that fails, try loading from the classpath.
+ ResourceLookup resourceLookup = new ResourceLookup(
+ // Solr-specific resource loading.
+ new SolrResourceLocator(core, initParams),
+ // Using the class loader directly because this time we want to omit the prefix
+ new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
+
+ DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
+ .resourceLookup(resourceLookup);
+
+ // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
+ // of this component. This by-name convention lookup is used to simplify configuring algorithms.
+ String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
+ log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
+
+ if (!Strings.isNullOrEmpty(componentName)) {
+ IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
+ if (attributeXmls.length > 0) {
+ if (attributeXmls.length > 1) {
+ log.warn("More than one attribute file found, first one will be used: "
+ + Arrays.toString(attributeXmls));
}
- @Override
- public String toString() {
- return "Solr config resource: " + resourceName;
+ Thread ct = Thread.currentThread();
+ ClassLoader prev = ct.getContextClassLoader();
+ try {
+ ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
+
+ AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
+ AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
+ initAttributes.putAll(defaultSet.getAttributeValues());
+ } catch (Exception e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR,
+ "Could not read attributes XML for clustering component: "
+ + componentName, e);
+ } finally {
+ ct.setContextClassLoader(prev);
}
- };
-
- return new IResource[] { foundResource };
- }
-
- @Override
- public int hashCode() {
- // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
- // so simply rely on instance equivalence.
- return super.equals(obj);
- }
-
- @Override
- public String toString() {
- String configDir = "";
- try {
- configDir = "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath() + ", ";
- } catch (Exception ignored) {
- // If we get the exception, the resource loader implementation
- // probably does not support getConfigDir(). Not a big problem.
- }
-
- return "SolrResourceLocator, " + configDir
- + "Carrot2 relative lexicalResourcesDir=" + carrot2ResourcesDir;
- }
- }
-
- @Override
- @Deprecated
- public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
- SolrIndexSearcher searcher = sreq.getSearcher();
- SolrDocumentList solrDocList;
- try {
- Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
- solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
- return cluster(query, solrDocList, docIds, sreq);
- } catch (IOException e) {
- throw new SolrException(ErrorCode.SERVER_ERROR, e);
- }
- }
-
- @Override
- public Object cluster(Query query, SolrDocumentList solrDocList,
- Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
- try {
- // Prepare attributes for Carrot2 clustering call
- Map<String, Object> attributes = new HashMap<String, Object>();
- List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
- attributes.put(AttributeNames.DOCUMENTS, documents);
- attributes.put(AttributeNames.QUERY, query.toString());
-
- // Pass the fields on which clustering runs to the
- // SolrStopwordsCarrot2LexicalDataFactory
- attributes.put("solrFieldNames", getFieldsForClustering(sreq));
-
- // Pass extra overriding attributes from the request, if any
- extractCarrotAttributes(sreq.getParams(), attributes);
-
- // Perform clustering and convert to named list
- // Carrot2 uses current thread's context class loader to get
- // certain classes (e.g. custom tokenizer/stemmer) at runtime.
- // To make sure classes from contrib JARs are available,
- // we swap the context class loader for the time of clustering.
- Thread ct = Thread.currentThread();
- ClassLoader prev = ct.getContextClassLoader();
- try {
- ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
- return clustersToNamedList(controller.process(attributes,
- clusteringAlgorithmClass).getClusters(), sreq.getParams());
- } finally {
- ct.setContextClassLoader(prev);
}
- } catch (Exception e) {
- log.error("Carrot2 clustering failed", e);
- throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
- }
- @Override
- @SuppressWarnings({ "unchecked", "rawtypes" })
- public String init(NamedList config, final SolrCore core) {
- this.core = core;
-
- String result = super.init(config, core);
- final SolrParams initParams = SolrParams.toSolrParams(config);
-
- // Initialize Carrot2 controller. Pass initialization attributes, if any.
- HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+ // Extract solrconfig attributes, they take precedence.
extractCarrotAttributes(initParams, initAttributes);
// Customize the stemmer and tokenizer factories. The implementations we provide here
@@ -291,15 +188,6 @@ public class CarrotClusteringEngine exte
// Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
initAttributes.put("solrCore", core);
- // Customize Carrot2's resource lookup to first look for resources
- // using Solr's resource loader. If that fails, try loading from the classpath.
- DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
- new ResourceLookup(
- // Solr-specific resource loading.
- new SolrResourceLocator(core, initParams),
- // Using the class loader directly because this time we want to omit the prefix
- new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
-
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
@@ -322,11 +210,50 @@ public class CarrotClusteringEngine exte
// Make sure the requested Carrot2 clustering algorithm class is available
String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
- this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
+ this.clusteringAlgorithmClass = core.getResourceLoader().findClass(
+ carrotAlgorithmClassName, IClusteringAlgorithm.class);
+
return result;
}
@Override
+ public Object cluster(Query query, SolrDocumentList solrDocList,
+ Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
+ try {
+ // Prepare attributes for Carrot2 clustering call
+ Map<String, Object> attributes = new HashMap<String, Object>();
+ List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
+ attributes.put(AttributeNames.DOCUMENTS, documents);
+ attributes.put(AttributeNames.QUERY, query.toString());
+
+ // Pass the fields on which clustering runs.
+ attributes.put("solrFieldNames", getFieldsForClustering(sreq));
+
+ // Pass extra overriding attributes from the request, if any
+ extractCarrotAttributes(sreq.getParams(), attributes);
+
+ // Perform clustering and convert to an output structure of clusters.
+ //
+ // Carrot2 uses current thread's context class loader to get
+ // certain classes (e.g. custom tokenizer/stemmer) at runtime.
+ // To make sure classes from contrib JARs are available,
+ // we swap the context class loader for the time of clustering.
+ Thread ct = Thread.currentThread();
+ ClassLoader prev = ct.getContextClassLoader();
+ try {
+ ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
+ return clustersToNamedList(controller.process(attributes,
+ clusteringAlgorithmClass).getClusters(), sreq.getParams());
+ } finally {
+ ct.setContextClassLoader(prev);
+ }
+ } catch (Exception e) {
+ log.error("Carrot2 clustering failed", e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
+ }
+ }
+
+ @Override
protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams();
@@ -383,8 +310,7 @@ public class CarrotClusteringEngine exte
// Parse language code map string into a map
Map<String, String> languageCodeMap = Maps.newHashMap();
if (StringUtils.isNotBlank(languageField)) {
- for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "")
- .split("[, ]")) {
+ for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
@@ -440,8 +366,9 @@ public class CarrotClusteringEngine exte
docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
- if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
- //should only be one document
+ if (highlights != null && highlights.size() == 1) {
+ // should only be one value given our setup
+ // should only be one document
@SuppressWarnings("unchecked")
NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
@@ -517,6 +444,13 @@ public class CarrotClusteringEngine exte
}
/**
+ * Expose clustering algorithm class for tests.
+ */
+ Class<? extends IClusteringAlgorithm> getClusteringAlgorithmClass() {
+ return clusteringAlgorithmClass;
+ }
+
+ /**
* Prepares a map of Solr field names (keys) to the corresponding Carrot2
* custom field names.
*/
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java Mon Oct 21 18:58:24 2013
@@ -24,6 +24,7 @@ import com.google.common.collect.Immutab
/**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
+ * @lucene.experimental
*/
public final class CarrotParams {
@@ -43,9 +44,21 @@ public final class CarrotParams {
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
- public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
+ /**
+ * Use {@link #RESOURCES_DIR}.
+ */
+ @Deprecated
+ public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+
+ /**
+ * A replacement property pointing to Carrot<sup>2</sup> resources
+ * (a more generic version of the deprecated {@link #LEXICAL_RESOURCES_DIR}).
+ */
+ public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
+
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM,
@@ -62,6 +75,7 @@ public final class CarrotParams {
NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS,
LEXICAL_RESOURCES_DIR,
+ RESOURCES_DIR,
LANGUAGE_CODE_MAP);
/** No instances. */
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2StemmerFactory.java Mon Oct 21 18:58:24 2013
@@ -48,6 +48,8 @@ import org.tartarus.snowball.ext.Turkish
* An implementation of Carrot2's {@link IStemmerFactory} based on Lucene's
* APIs. Should the relevant Lucene APIs need to change, the changes can be made
* in this class.
+ *
+ * @lucene.experimental
*/
public class LuceneCarrot2StemmerFactory implements IStemmerFactory {
final static Logger logger = org.slf4j.LoggerFactory
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java Mon Oct 21 18:58:24 2013
@@ -38,6 +38,8 @@ import org.slf4j.Logger;
* Smart Chinese tokenizer. If Smart Chinese tokenizer is not available in
* classpath at runtime, the default Carrot2's tokenizer is used. Should the
* Lucene APIs need to change, the changes can be made in this class.
+ *
+ * @lucene.experimental
*/
public class LuceneCarrot2TokenizerFactory implements ITokenizerFactory {
final static Logger logger = org.slf4j.LoggerFactory
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java Mon Oct 21 18:58:24 2013
@@ -50,10 +50,11 @@ import com.google.common.collect.Multima
* stop words removal. In other words, if something is a stop word during
* indexing, then it should also be a stop word during clustering, but not the
* other way round.
+ *
+ * @lucene.experimental
*/
@Bindable
-public class SolrStopwordsCarrot2LexicalDataFactory implements
- ILexicalDataFactory {
+public class SolrStopwordsCarrot2LexicalDataFactory implements ILexicalDataFactory {
final static Logger logger = org.slf4j.LoggerFactory
.getLogger(SolrStopwordsCarrot2LexicalDataFactory.class);
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/overview.html?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/overview.html (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/java/overview.html Mon Oct 21 18:58:24 2013
@@ -16,6 +16,6 @@
-->
<html>
<body>
-Apache Solr Search Server: Clustering contrib
+Apache Solr Search Server: text clustering contrib
</body>
</html>
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml Mon Oct 21 18:58:24 2013
@@ -328,6 +328,12 @@
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
</lst>
<lst name="engine">
+ <str name="name">mock-external-attrs</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ <!-- takes precedence over external XML -->
+ <int name="MockClusteringAlgorithm.labels">4</int>
+ </lst>
+ <lst name="engine">
<str name="name">echo</str>
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str>
</lst>
@@ -338,6 +344,11 @@
<lst name="engine">
<str name="name">lexical-resource-check-custom-resource-dir</str>
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
+ <str name="carrot.resourcesDir">clustering/custom</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">lexical-resource-check-custom-resource-dir-deprecated</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
<str name="carrot.lexicalResourcesDir">clustering/custom</str>
</lst>
<lst name="engine">
@@ -362,7 +373,47 @@
</searchComponent>
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-default">
+ <lst name="engine">
+ <str name="name">stc</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">default</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">mock</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
+
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-decl-order">
+ <lst name="engine">
+ <str name="name">lingo</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">stc</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">mock</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-dups">
+ <lst name="engine">
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
<!-- Update request handler.
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java Mon Oct 21 18:58:24 2013
@@ -16,6 +16,8 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.junit.BeforeClass;
@@ -82,10 +84,16 @@ public abstract class AbstractClustering
docWithCustomFields.addField("heading", "first");
docWithCustomFields.addField("heading", "second");
assertNull(h.validateUpdate(adoc(docWithCustomFields)));
-
assertNull(h.validateUpdate(commit()));
}
+ /**
+ * Expose package-scope methods from {@link ClusteringComponent} to tests.
+ */
+ protected final Map<String,SearchClusteringEngine> getSearchClusteringEngines(ClusteringComponent comp) {
+ return comp.getSearchClusteringEngines();
+ }
+
final static String[][] DOCUMENTS = new String[][]{
{"http://en.wikipedia.org/wiki/Data_mining",
"Data Mining - Wikipedia",
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java Mon Oct 21 18:58:24 2013
@@ -55,7 +55,7 @@ public class ClusteringComponentTest ext
rsp.add("responseHeader", new SimpleOrderedMap<Object>());
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
handler.handleRequest(req, rsp);
- NamedList values = rsp.getValues();
+ NamedList<?> values = rsp.getValues();
Object clusters = values.get("clusters");
//System.out.println("Clusters: " + clusters);
assertTrue("clusters is null and it shouldn't be", clusters != null);
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java Mon Oct 21 18:58:24 2013
@@ -26,14 +26,12 @@ import org.apache.solr.search.DocSet;
**/
public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
@Override
- public NamedList cluster(DocSet docs, SolrParams solrParams) {
- NamedList result = new NamedList();
- return result;
+ public NamedList<?> cluster(DocSet docs, SolrParams solrParams) {
+ return new NamedList<Object>();
}
@Override
- public NamedList cluster(SolrParams solrParams) {
- NamedList result = new NamedList();
- return result;
+ public NamedList<?> cluster(SolrParams solrParams) {
+ return new NamedList<Object>();
}
}
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Mon Oct 21 18:58:24 2013
@@ -34,16 +34,20 @@ import org.apache.solr.common.params.Sol
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.clustering.AbstractClusteringTestCase;
import org.apache.solr.handler.clustering.ClusteringComponent;
+import org.apache.solr.handler.clustering.ClusteringEngine;
+import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.SolrPluginUtils;
+import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
/**
*
@@ -122,7 +126,14 @@ public class CarrotClusteringEngineTest
@Test
public void testWithoutSubclusters() throws Exception {
checkClusters(checkEngine(getClusteringEngine("mock"), AbstractClusteringTestCase.numberOfDocs),
- 1, 1, 0);
+ 1, 1, 0);
+ }
+
+ @Test
+ public void testExternalXmlAttributesFile() throws Exception {
+ checkClusters(
+ checkEngine(getClusteringEngine("mock-external-attrs"), 13),
+ 1, 4, 0);
}
@Test
@@ -189,6 +200,12 @@ public class CarrotClusteringEngineTest
"online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
}
+ @Test
+ public void testLexicalResourcesFromSolrConfigCustomDirDeprecated() throws Exception {
+ checkLexicalResourcesFromSolrConfig("lexical-resource-check-custom-resource-dir-deprecated",
+ "online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
+ }
+
private void checkLexicalResourcesFromSolrConfig(String engineName, String wordsToCheck)
throws IOException {
ModifiableSolrParams params = new ModifiableSolrParams();
@@ -380,12 +397,48 @@ public class CarrotClusteringEngineTest
assertTrue("First token", labels.get(0).contains("titletitle"));
}
+ @Test
+ public void testDefaultEngineOrder() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("stc", "default", "mock"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ LingoClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
+ @Test
+ public void testDeclarationEngineOrder() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("lingo", "stc", "mock", "default"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ LingoClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
+ @Test
+ public void testDeclarationNameDuplicates() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("", "default"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ MockClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
private CarrotClusteringEngine getClusteringEngine(String engineName) {
ClusteringComponent comp = (ClusteringComponent) h.getCore()
.getSearchComponent("clustering");
assertNotNull("clustering component should not be null", comp);
- CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
- .getSearchClusteringEngines().get(engineName);
+ CarrotClusteringEngine engine =
+ (CarrotClusteringEngine) getSearchClusteringEngines(comp).get(engineName);
assertNotNull("clustering engine for name: " + engineName
+ " should not be null", engine);
return engine;
Modified: lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java Mon Oct 21 18:58:24 2013
@@ -52,6 +52,12 @@ public class MockClusteringAlgorithm ext
@Input
@Processing
@Attribute
+ @IntRange(min = 0)
+ private int maxClusters = 0;
+
+ @Input
+ @Processing
+ @Attribute
private int otherTopicsModulo = 0;
@Override
@@ -61,6 +67,10 @@ public class MockClusteringAlgorithm ext
return;
}
+ if (maxClusters > 0) {
+ documents = documents.subList(0, maxClusters);
+ }
+
int documentIndex = 1;
for (Document document : documents) {
StringBuilder label = new StringBuilder("Cluster " + documentIndex);
Modified: lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/ivy.xml Mon Oct 21 18:58:24 2013
@@ -23,10 +23,10 @@
<conf name="test" transitive="false"/>
</configurations>
<dependencies>
- <dependency org="javax.activation" name="activation" rev="1.1" conf="compile->*"/>
- <dependency org="javax.mail" name="mail" rev="1.4.1" conf="compile->*"/>
+ <dependency org="javax.activation" name="activation" rev="${/javax.activation/activation}" conf="compile->*"/>
+ <dependency org="javax.mail" name="mail" rev="${/javax.mail/mail}" conf="compile->*"/>
- <dependency org="org.easymock" name="easymock" rev="3.0" conf="test->*"/>
+ <dependency org="org.easymock" name="easymock" rev="${/org.easymock/easymock}" conf="test->*"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
Modified: lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrEntityProcessor.java Mon Oct 21 18:58:24 2013
@@ -71,8 +71,6 @@ public class SolrEntityProcessor extends
private String[] fields;
private String requestHandler;// 'qt' param
private int timeout = TIMEOUT_SECS;
-
- private boolean initDone = false;
/**
* Factory method that returns a {@link HttpClient} instance used for interfacing with a source Solr service.
@@ -96,13 +94,16 @@ public class SolrEntityProcessor extends
"SolrEntityProcessor: parameter 'url' is required");
}
+ // TODO: we should close this client!
HttpClient client = getHttpClient();
URL url = new URL(serverPath);
// (wt="javabin|xml") default is javabin
if ("xml".equals(context.getResolvedEntityAttribute(CommonParams.WT))) {
+ // TODO: it doesn't matter for this impl when passing a client currently, but we should shutdown this!
solrServer = new HttpSolrServer(url.toExternalForm(), client, new XMLResponseParser());
LOG.info("using XMLResponseParser");
} else {
+ // TODO: it doesn't matter for this impl when passing a client currently, but we should shutdown this!
solrServer = new HttpSolrServer(url.toExternalForm(), client);
LOG.info("using BinaryResponseParser");
}
@@ -122,28 +123,22 @@ public class SolrEntityProcessor extends
* external synchronization.
*/
private void buildIterator() {
- if (rowIterator == null) {
- // We could use an AtomicBoolean but there's no need since this method
- // would require anyway external synchronization
- if (!initDone) {
- initDone = true;
- SolrDocumentList solrDocumentList = doQuery(0);
+ if (rowIterator != null) {
+ SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
+ if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
+ SolrDocumentList solrDocumentList = doQuery(documentListIterator
+ .getStart() + documentListIterator.getSize());
if (solrDocumentList != null) {
rowIterator = new SolrDocumentListIterator(solrDocumentList);
}
}
- return;
- }
-
- SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
- if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
- SolrDocumentList solrDocumentList = doQuery(documentListIterator
- .getStart() + documentListIterator.getSize());
+ } else {
+ SolrDocumentList solrDocumentList = doQuery(0);
if (solrDocumentList != null) {
rowIterator = new SolrDocumentListIterator(solrDocumentList);
}
+ return;
}
-
}
protected SolrDocumentList doQuery(int start) {
Modified: lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java Mon Oct 21 18:58:24 2013
@@ -41,6 +41,7 @@ import java.util.List;
*/
public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCase {
private static final String CONF_DIR = "dih/solr/collection1/conf/";
+ private static final String ROOT_DIR = "dih/solr/";
SolrInstance instance = null;
JettySolrRunner jetty;
@@ -144,6 +145,11 @@ public class TestContentStreamDataSource
return CONF_DIR + "contentstream-solrconfig.xml";
}
+ public String getSolrXmlFile() {
+ return ROOT_DIR + "solr.xml";
+ }
+
+
public void setUp() throws Exception {
File home = new File(TEMP_DIR,
@@ -158,6 +164,7 @@ public class TestContentStreamDataSource
dataDir.mkdirs();
confDir.mkdirs();
+ FileUtils.copyFile(getFile(getSolrXmlFile()), new File(homeDir, "solr.xml"));
File f = new File(confDir, "solrconfig.xml");
FileUtils.copyFile(getFile(getSolrConfigFile()), f);
f = new File(confDir, "schema.xml");
Modified: lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorEndToEnd.java Mon Oct 21 18:58:24 2013
@@ -49,7 +49,8 @@ public class TestSolrEntityProcessorEndT
private static final String SOLR_CONFIG = "dataimport-solrconfig.xml";
private static final String SOLR_SCHEMA = "dataimport-schema.xml";
private static final String SOURCE_CONF_DIR = "dih" + File.separator + "solr" + File.separator + "collection1" + File.separator + "conf" + File.separator;
-
+ private static final String ROOT_DIR = "dih" + File.separator + "solr" + File.separator;
+
private static final String DEAD_SOLR_SERVER = "http://[ff01::114]:33332/solr";
private static final List<Map<String,Object>> DB_DOCS = new ArrayList<Map<String,Object>>();
@@ -61,7 +62,7 @@ public class TestSolrEntityProcessorEndT
dbDoc.put("dbid_s", "1");
dbDoc.put("dbdesc_s", "DbDescription");
DB_DOCS.add(dbDoc);
-
+
Map<String,Object> solrDoc = new HashMap<String,Object>();
solrDoc.put("id", "1");
solrDoc.put("desc", "SolrDescription");
@@ -201,8 +202,19 @@ public class TestSolrEntityProcessorEndT
assertQ(req("*:*"), "//result[@numFound='0']");
try {
- MockDataSource.setIterator("select * from x", DB_DOCS.iterator());
- addDocumentsToSolr(SOLR_DOCS);
+ List<Map<String,Object>> DOCS = new ArrayList<Map<String,Object>>(DB_DOCS);
+ Map<String, Object> doc = new HashMap<String, Object>();
+ doc.put("dbid_s", "2");
+ doc.put("dbdesc_s", "DbDescription2");
+ DOCS.add(doc);
+ MockDataSource.setIterator("select * from x", DOCS.iterator());
+
+ DOCS = new ArrayList<Map<String,Object>>(SOLR_DOCS);
+ Map<String,Object> solrDoc = new HashMap<String,Object>();
+ solrDoc.put("id", "2");
+ solrDoc.put("desc", "SolrDescription2");
+ DOCS.add(solrDoc);
+ addDocumentsToSolr(DOCS);
runFullImport(getDihConfigTagsInnerEntity());
} catch (Exception e) {
LOG.error(e.getMessage(), e);
@@ -211,12 +223,15 @@ public class TestSolrEntityProcessorEndT
MockDataSource.clearCache();
}
- assertQ(req("*:*"), "//result[@numFound='1']");
+ assertQ(req("*:*"), "//result[@numFound='2']");
assertQ(req("id:1"), "//result/doc/str[@name='id'][.='1']",
"//result/doc/str[@name='dbdesc_s'][.='DbDescription']",
"//result/doc/str[@name='dbid_s'][.='1']",
"//result/doc/arr[@name='desc'][.='SolrDescription']");
-
+ assertQ(req("id:2"), "//result/doc/str[@name='id'][.='2']",
+ "//result/doc/str[@name='dbdesc_s'][.='DbDescription2']",
+ "//result/doc/str[@name='dbid_s'][.='2']",
+ "//result/doc/arr[@name='desc'][.='SolrDescription2']");
}
public void testFullImportWrongSolrUrl() {
@@ -267,10 +282,14 @@ public class TestSolrEntityProcessorEndT
}
HttpSolrServer solrServer = new HttpSolrServer(getSourceUrl());
- solrServer.setConnectionTimeout(15000);
- solrServer.setSoTimeout(30000);
- solrServer.add(sidl);
- solrServer.commit(true, true);
+ try {
+ solrServer.setConnectionTimeout(15000);
+ solrServer.setSoTimeout(30000);
+ solrServer.add(sidl);
+ solrServer.commit(true, true);
+ } finally {
+ solrServer.shutdown();
+ }
}
private static class SolrInstance {
@@ -293,7 +312,11 @@ public class TestSolrEntityProcessorEndT
public String getSolrConfigFile() {
return SOURCE_CONF_DIR + "dataimport-solrconfig.xml";
}
-
+
+ public String getSolrXmlFile() {
+ return ROOT_DIR + "solr.xml";
+ }
+
public void setUp() throws Exception {
File home = new File(TEMP_DIR, getClass().getName() + "-"
@@ -306,7 +329,8 @@ public class TestSolrEntityProcessorEndT
homeDir.mkdirs();
dataDir.mkdirs();
confDir.mkdirs();
-
+
+ FileUtils.copyFile(getFile(getSolrXmlFile()), new File(homeDir, "solr.xml"));
File f = new File(confDir, "solrconfig.xml");
FileUtils.copyFile(getFile(getSolrConfigFile()), f);
f = new File(confDir, "schema.xml");
Modified: lucene/dev/branches/lucene4956/solr/contrib/extraction/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/extraction/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/extraction/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/extraction/ivy.xml Mon Oct 21 18:58:24 2013
@@ -20,39 +20,39 @@
<info organisation="org.apache.solr" module="extraction"/>
<dependencies>
<!-- Tika JARs -->
- <dependency org="org.apache.tika" name="tika-core" rev="1.4" transitive="false"/>
- <dependency org="org.apache.tika" name="tika-parsers" rev="1.4" transitive="false"/>
+ <dependency org="org.apache.tika" name="tika-core" rev="${/org.apache.tika/tika-core}" transitive="false"/>
+ <dependency org="org.apache.tika" name="tika-parsers" rev="${/org.apache.tika/tika-parsers}" transitive="false"/>
<!-- Tika dependencies - see http://tika.apache.org/1.3/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones
(except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
- <dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/>
- <dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/>
- <dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/>
- <dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" transitive="false"/>
- <dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" transitive="false"/>
- <dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
- <dependency org="org.apache.pdfbox" name="pdfbox" rev="1.8.1" transitive="false"/>
- <dependency org="org.apache.pdfbox" name="fontbox" rev="1.8.1" transitive="false"/>
- <dependency org="org.apache.pdfbox" name="jempbox" rev="1.8.1" transitive="false"/>
- <dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/>
- <dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/>
- <dependency org="org.apache.poi" name="poi" rev="3.9" transitive="false"/>
- <dependency org="org.apache.poi" name="poi-scratchpad" rev="3.9" transitive="false"/>
- <dependency org="org.apache.poi" name="poi-ooxml" rev="3.9" transitive="false"/>
- <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.9" transitive="false"/>
- <dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/>
- <dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/>
- <dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/>
- <dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-RC-1" transitive="false"/>
- <dependency org="com.drewnoakes" name="metadata-extractor" rev="2.6.2" transitive="false"/>
- <dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/>
- <dependency org="rome" name="rome" rev="0.9" transitive="false"/>
- <dependency org="jdom" name="jdom" rev="1.0" transitive="false"/>
- <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" transitive="false"/>
- <dependency org="org.tukaani" name="xz" rev="1.0" transitive="false"/>
+ <dependency org="org.gagravarr" name="vorbis-java-tika" rev="${/org.gagravarr/vorbis-java-tika}" transitive="false"/>
+ <dependency org="org.gagravarr" name="vorbis-java-core" rev="${/org.gagravarr/vorbis-java-core}" transitive="false"/>
+ <dependency org="edu.ucar" name="netcdf" rev="${/edu.ucar/netcdf}" transitive="false"/>
+ <dependency org="org.apache.james" name="apache-mime4j-core" rev="${/org.apache.james/apache-mime4j-core}" transitive="false"/>
+ <dependency org="org.apache.james" name="apache-mime4j-dom" rev="${/org.apache.james/apache-mime4j-dom}" transitive="false"/>
+ <dependency org="org.apache.commons" name="commons-compress" rev="${/org.apache.commons/commons-compress}" transitive="false"/>
+ <dependency org="org.apache.pdfbox" name="pdfbox" rev="${/org.apache.pdfbox/pdfbox}" transitive="false"/>
+ <dependency org="org.apache.pdfbox" name="fontbox" rev="${/org.apache.pdfbox/fontbox}" transitive="false"/>
+ <dependency org="org.apache.pdfbox" name="jempbox" rev="${/org.apache.pdfbox/jempbox}" transitive="false"/>
+ <dependency org="org.bouncycastle" name="bcmail-jdk15" rev="${/org.bouncycastle/bcmail-jdk15}" transitive="false"/>
+ <dependency org="org.bouncycastle" name="bcprov-jdk15" rev="${/org.bouncycastle/bcprov-jdk15}" transitive="false"/>
+ <dependency org="org.apache.poi" name="poi" rev="${/org.apache.poi/poi}" transitive="false"/>
+ <dependency org="org.apache.poi" name="poi-scratchpad" rev="${/org.apache.poi/poi-scratchpad}" transitive="false"/>
+ <dependency org="org.apache.poi" name="poi-ooxml" rev="${/org.apache.poi/poi-ooxml}" transitive="false"/>
+ <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="${/org.apache.poi/poi-ooxml-schemas}" transitive="false"/>
+ <dependency org="org.apache.xmlbeans" name="xmlbeans" rev="${/org.apache.xmlbeans/xmlbeans}" transitive="false"/>
+ <dependency org="dom4j" name="dom4j" rev="${/dom4j/dom4j}" transitive="false"/>
+ <dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="${/org.ccil.cowan.tagsoup/tagsoup}" transitive="false"/>
+ <dependency org="com.googlecode.mp4parser" name="isoparser" rev="${/com.googlecode.mp4parser/isoparser}" transitive="false"/>
+ <dependency org="com.drewnoakes" name="metadata-extractor" rev="${/com.drewnoakes/metadata-extractor}" transitive="false"/>
+ <dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="${/de.l3s.boilerpipe/boilerpipe}" transitive="false"/>
+ <dependency org="rome" name="rome" rev="${/rome/rome}" transitive="false"/>
+ <dependency org="jdom" name="jdom" rev="${/jdom/jdom}" transitive="false"/>
+ <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="${/com.googlecode.juniversalchardet/juniversalchardet}" transitive="false"/>
+ <dependency org="org.tukaani" name="xz" rev="${/org.tukaani/xz}" transitive="false"/>
<!-- Other ExtracingRequestHandler dependencies -->
- <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
- <dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>
+ <dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" transitive="false"/>
+ <dependency org="xerces" name="xercesImpl" rev="${/xerces/xercesImpl}" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
Modified: lucene/dev/branches/lucene4956/solr/contrib/langid/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/langid/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/langid/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/langid/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,8 +19,8 @@
<ivy-module version="2.0">
<info organisation="org.apache.solr" module="langid"/>
<dependencies>
- <dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" transitive="false"/>
- <dependency org="net.arnx" name="jsonic" rev="1.2.7" transitive="false"/>
+ <dependency org="com.cybozu.labs" name="langdetect" rev="${/com.cybozu.labs/langdetect}" transitive="false"/>
+ <dependency org="net.arnx" name="jsonic" rev="${/net.arnx/jsonic}" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/build.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/build.xml Mon Oct 21 18:58:24 2013
@@ -26,17 +26,36 @@
<import file="../contrib-build.xml"/>
- <path id="classpath">
+ <path id="uima.lucene.libs">
<pathelement path="${analyzers-uima.jar}"/>
+ </path>
+
+ <path id="classpath">
+ <path refid="uima.lucene.libs"/>
<path refid="solr.base.classpath"/>
</path>
- <target name="module-jars-to-solr" depends="jar-analyzers-uima">
+ <target name="module-jars-to-solr"
+ depends="-module-jars-to-solr-not-for-package,-module-jars-to-solr-package"/>
+ <target name="-module-jars-to-solr-not-for-package" unless="called.from.create-package">
+ <antcall target="jar-analyzers-uima" inheritall="true"/>
+ <property name="analyzers-uima.uptodate" value="true"/>
<mkdir dir="${build.dir}/lucene-libs"/>
<copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
<fileset file="${analyzers-uima.jar}"/>
</copy>
</target>
+ <target name="-module-jars-to-solr-package" if="called.from.create-package">
+ <antcall target="-unpack-lucene-tgz" inheritall="true"/>
+ <pathconvert property="relative.uima.lucene.libs" pathsep=",">
+ <path refid="uima.lucene.libs"/>
+ <globmapper from="${common.build.dir}/*" to="*" handledirsep="true"/>
+ </pathconvert>
+ <mkdir dir="${build.dir}/lucene-libs"/>
+ <copy todir="${build.dir}/lucene-libs" preservelastmodified="true" flatten="true" failonerror="true" overwrite="true">
+ <fileset dir="${lucene.tgz.unpack.dir}/lucene-${version}" includes="${relative.uima.lucene.libs}"/>
+ </copy>
+ </target>
<target name="compile-core" depends="jar-analyzers-uima, solr-contrib-build.compile-core"/>
<target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
Modified: lucene/dev/branches/lucene4956/solr/contrib/uima/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/solr/contrib/uima/ivy.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/solr/contrib/uima/ivy.xml (original)
+++ lucene/dev/branches/lucene4956/solr/contrib/uima/ivy.xml Mon Oct 21 18:58:24 2013
@@ -19,12 +19,12 @@
<ivy-module version="2.0">
<info organisation="org.apache.solr" module="uima"/>
<dependencies>
- <dependency org="commons-digester" name="commons-digester" rev="2.0" transitive="false"/>
- <dependency org="org.apache.uima" name="AlchemyAPIAnnotator" rev="2.3.1" transitive="false"/>
- <dependency org="org.apache.uima" name="OpenCalaisAnnotator" rev="2.3.1" transitive="false"/>
- <dependency org="org.apache.uima" name="Tagger" rev="2.3.1" transitive="false"/>
- <dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="2.3.1" transitive="false"/>
- <dependency org="org.apache.uima" name="uimaj-core" rev="2.3.1" transitive="false"/>
+ <dependency org="commons-digester" name="commons-digester" rev="${/commons-digester/commons-digester}" transitive="false"/>
+ <dependency org="org.apache.uima" name="AlchemyAPIAnnotator" rev="${/org.apache.uima/AlchemyAPIAnnotator}" transitive="false"/>
+ <dependency org="org.apache.uima" name="OpenCalaisAnnotator" rev="${/org.apache.uima/OpenCalaisAnnotator}" transitive="false"/>
+ <dependency org="org.apache.uima" name="Tagger" rev="${/org.apache.uima/Tagger}" transitive="false"/>
+ <dependency org="org.apache.uima" name="WhitespaceTokenizer" rev="${/org.apache.uima/WhitespaceTokenizer}" transitive="false"/>
+ <dependency org="org.apache.uima" name="uimaj-core" rev="${/org.apache.uima/uimaj-core}" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>