You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2007/09/07 12:09:26 UTC
svn commit: r573526 - in /jackrabbit/trunk/jackrabbit-core/src/main:
java/org/apache/jackrabbit/core/query/lucene/
resources/org/apache/jackrabbit/core/query/lucene/
Author: mreutegg
Date: Fri Sep 7 03:09:24 2007
New Revision: 573526
URL: http://svn.apache.org/viewvc?rev=573526&view=rev
Log:
JCR-1079: Extend the IndexingConfiguration to allow configuration of reuseable analyzers
Added:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java (with props)
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java?rev=573526&r1=573525&r2=573526&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfiguration.java Fri Sep 7 03:09:24 2007
@@ -19,6 +19,7 @@
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.name.QName;
+import org.apache.lucene.analysis.Analyzer;
import org.w3c.dom.Element;
/**
@@ -39,9 +40,10 @@
*
* @param config the document element of the configuration DOM.
* @param context the context of the query handler.
+ * @param namespaceMappings the namespaceMappings.
* @throws Exception if initialization fails.
*/
- public void init(Element config, QueryHandlerContext context) throws Exception;
+ public void init(Element config, QueryHandlerContext context, NamespaceMappings namespaceMappings) throws Exception;
/**
* Returns the configured indexing aggregate rules or <code>null</code> if
@@ -92,4 +94,19 @@
* @return the boost for the node scope fulltext index field.
*/
float getNodeBoost(NodeState state);
+
+ /**
+ * Returns the analyzer configured for the property with this fieldName
+ * (the string representation ,JCR-style name, of the given <code>QName</code>
+ * prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>),
+ * and <code>null</code> if none is configured, or the configured analyzer
+ * cannot be found. If <code>null</code> is returned, the default Analyzer
+ * is used.
+ *
+ * @param fieldName the string representation ,JCR-style name, of the given <code>QName</code>,
+ * prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>)
+ * @return the <code>analyzer</code> to use for indexing this property
+ */
+ Analyzer getPropertyAnalyzer(String fieldName);
+
}
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java?rev=573526&r1=573525&r2=573526&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/IndexingConfigurationImpl.java Fri Sep 7 03:09:24 2007
@@ -38,7 +38,10 @@
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.util.ISO9075;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.commons.collections.iterators.AbstractIteratorDecorator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -63,6 +66,11 @@
public class IndexingConfigurationImpl implements IndexingConfiguration {
/**
+ * The logger instance for this class
+ */
+ private static final Logger log = LoggerFactory.getLogger(IndexingConfigurationImpl.class);
+
+ /**
* A namespace resolver for parsing QNames in the configuration.
*/
private NamespaceResolver nsResolver;
@@ -93,9 +101,14 @@
private AggregateRule[] aggregateRules;
/**
+ * The configured analyzers for indexing properties.
+ */
+ private Map analyzers = new HashMap();
+
+ /**
* {@inheritDoc}
*/
- public void init(Element config, QueryHandlerContext context) throws Exception {
+ public void init(Element config, QueryHandlerContext context, NamespaceMappings nsMappings) throws Exception {
ntReg = context.getNodeTypeRegistry();
ism = context.getItemStateManager();
NameResolver nameResolver = new ParsingNameResolver(
@@ -126,7 +139,49 @@
} else if (configNode.getNodeName().equals("aggregate")) {
idxAggregates.add(new AggregateRuleImpl(
configNode, nsResolver, ism, hmgr));
+ } else if (configNode.getNodeName().equals("analyzers")) {
+ NodeList childNodes = configNode.getChildNodes();
+ for (int j = 0; j < childNodes.getLength(); j++) {
+ Node analyzerNode = childNodes.item(j);
+ if (analyzerNode.getNodeName().equals("analyzer")) {
+ String analyzerClassName = analyzerNode.getAttributes().getNamedItem("class").getNodeValue();
+ try {
+ Class clazz = Class.forName(analyzerClassName);
+ if(clazz == JackrabbitAnalyzer.class) {
+ log.warn("Not allowed to configure " + JackrabbitAnalyzer.class.getName() + " for a property. " +
+ "Using default analyzer for that property.");
+ }
+ else if(Analyzer.class.isAssignableFrom(clazz)){
+ Analyzer analyzer = (Analyzer)clazz.newInstance();
+ NodeList propertyChildNodes = analyzerNode.getChildNodes();
+ for (int k = 0; k < propertyChildNodes.getLength(); k++) {
+ Node propertyNode = propertyChildNodes.item(k);
+ if (propertyNode.getNodeName().equals("property")) {
+ // get property name
+ QName propName = NameFormat.parse(getTextContent(propertyNode), nsResolver);
+ String fieldName = NameFormat.format(propName, nsMappings);
+ // set analyzer for the fulltext property fieldname
+ int idx = fieldName.indexOf(':');
+ fieldName = fieldName.substring(0, idx + 1)
+ + FieldNames.FULLTEXT_PREFIX + fieldName.substring(idx + 1);;
+ Object prevAnalyzer = analyzers.put(fieldName, analyzer);
+ if(prevAnalyzer!=null){
+ log.warn("Property " + propName.getLocalName() + " has been configured for multiple analyzers. " +
+ " Last configured analyzer is used");
+ }
+ }
+ }
+ } else {
+ log.warn("org.apache.lucene.analysis.Analyzer is not a superclass of "
+ + analyzerClassName +". Ignoring this configure analyzer" );
+ }
+ } catch (ClassNotFoundException e) {
+ log.warn("Analyzer class not found: " + analyzerClassName, e);
+ }
+ }
+ }
}
+
}
aggregateRules = (AggregateRule[]) idxAggregates.toArray(
new AggregateRule[idxAggregates.size()]);
@@ -211,6 +266,25 @@
return true;
}
+
+ /**
+ * Returns the analyzer configured for the property with this fieldName
+ * (the string representation ,JCR-style name, of the given <code>QName</code>
+ * prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>)),
+ * and <code>null</code> if none is configured, or the configured analyzer
+ * cannot be found. If <code>null</code> is returned, the default Analyzer
+ * is used.
+ *
+ * @param fieldName the string representation ,JCR-style name, of the given <code>QName</code>
+ * prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>))
+ * @return the <code>analyzer</code> to use for indexing this property
+ */
+ public Analyzer getPropertyAnalyzer(String fieldName) {
+ if(analyzers.containsKey(fieldName)){
+ return (Analyzer)analyzers.get(fieldName);
+ }
+ return null;
+ }
//---------------------------------< internal >-----------------------------
/**
@@ -732,4 +806,4 @@
return false;
}
}
-}
\ No newline at end of file
+}
Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java?rev=573526&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java Fri Sep 7 03:09:24 2007
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+import java.io.Reader;
+
+/**
+ * This is the global jackrabbit lucene analyzer. By default, all
+ * properties are indexed with the <code>StandardAnalyzer(new String[]{})</code>,
+ * unless in the <SearchIndex> configuration a global analyzer is defined.
+ *
+ * In the indexing configuration, properties can be configured to be
+ * indexed with a specific analyzer. If configured, this analyzer is used to
+ * index the text of the property and to parse searchtext for this property.
+ */
+
+public class JackrabbitAnalyzer extends Analyzer {
+
+ /**
+ * The default Jackrabbit analyzer if none is configured in <code><SearchIndex></code>
+ * configuration.
+ */
+ private Analyzer defaultAnalyzer = new StandardAnalyzer(new String[]{});
+
+ /**
+ * The indexing configuration.
+ */
+ private IndexingConfiguration indexingConfig;
+
+ /**
+ * A param indexingConfig the indexing configuration.
+ */
+ protected void setIndexingConfig(IndexingConfiguration indexingConfig) {
+ this.indexingConfig = indexingConfig;
+ }
+
+ /**
+ * @param analyzer the default jackrabbit analyzer
+ */
+ protected void setDefaultAnalyzer(Analyzer analyzer){
+ defaultAnalyzer = analyzer;
+ }
+
+ /**
+ * Creates a TokenStream which tokenizes all the text in the provided
+ * Reader. If the fieldName (property) is configured to have a different
+ * analyzer than the default, this analyzer is used for tokenization
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ if(indexingConfig!=null){
+ Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
+ if(propertyAnalyzer!=null){
+ return propertyAnalyzer.tokenStream(fieldName, reader);
+ }
+ }
+ return defaultAnalyzer.tokenStream(fieldName, reader);
+ }
+
+}
Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?rev=573526&r1=573525&r2=573526&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Fri Sep 7 03:09:24 2007
@@ -39,7 +39,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
@@ -152,7 +151,7 @@
/**
* The analyzer we use for indexing.
*/
- private Analyzer analyzer;
+ private JackrabbitAnalyzer analyzer;
/**
* List of text extractor and text filter class names. The configured
@@ -330,7 +329,7 @@
* Default constructor.
*/
public SearchIndex() {
- this.analyzer = new StandardAnalyzer(new String[]{});
+ this.analyzer = new JackrabbitAnalyzer();
}
/**
@@ -352,7 +351,6 @@
}
extractor = createTextExtractor();
- indexingConfig = createIndexingConfiguration();
synProvider = createSynonymProvider();
File indexDir = new File(path);
@@ -375,7 +373,10 @@
context.getNamespaceRegistry());
}
}
-
+
+ indexingConfig = createIndexingConfiguration(nsMappings);
+ analyzer.setIndexingConfig(indexingConfig);
+
index = new MultiIndex(indexDir, this, excludedIDs, nsMappings);
if (index.numDocs() == 0) {
index.createInitialIndex(
@@ -793,10 +794,11 @@
}
/**
+ * @param namespaceMappings The namespace mappings
* @return the fulltext indexing configuration or <code>null</code> if there
* is no configuration.
*/
- protected IndexingConfiguration createIndexingConfiguration() {
+ protected IndexingConfiguration createIndexingConfiguration(NamespaceMappings namespaceMappings) {
Element docElement = getIndexingConfigurationDOM();
if (docElement == null) {
return null;
@@ -804,7 +806,7 @@
try {
IndexingConfiguration idxCfg = (IndexingConfiguration)
indexingConfigurationClass.newInstance();
- idxCfg.init(docElement, getContext());
+ idxCfg.init(docElement, getContext(), namespaceMappings);
return idxCfg;
} catch (Exception e) {
log.warn("Exception initializing indexing configuration from: " +
@@ -1121,7 +1123,7 @@
public void setAnalyzer(String analyzerClassName) {
try {
Class analyzerClass = Class.forName(analyzerClassName);
- analyzer = (Analyzer) analyzerClass.newInstance();
+ analyzer.setDefaultAnalyzer((Analyzer) analyzerClass.newInstance());
} catch (Exception e) {
log.warn("Invalid Analyzer class: " + analyzerClassName, e);
}
Modified: jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd?rev=573526&r1=573525&r2=573526&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/indexing-configuration-1.0.dtd Fri Sep 7 03:09:24 2007
@@ -63,4 +63,15 @@
-->
<!ELEMENT property (#PCDATA)>
<!ATTLIST property boost CDATA "1.0"
- nodeScopeIndex CDATA "true">
\ No newline at end of file
+ nodeScopeIndex CDATA "true">
+
+<!--
+ An analyzer element with property elements in it defines which analyzer is to
+ be used for indexing and parsing the full text of this property. If the analyzer
+ class can not be found, the default analyzer is used. The node scope is always
+ indexed with the default analyzer, so might return different results for search
+ queries in some rare cases.
+-->
+<!ELEMENT analyzers (analyzer*)>
+<!ELEMENT analyzer (property*)>
+<!ATTLIST analyzer class CDATA #REQUIRED>