You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2007/05/08 18:36:18 UTC
svn commit: r536246 - in /jackrabbit/trunk/jackrabbit-core/src/main:
java/org/apache/jackrabbit/core/query/lucene/ javacc/fulltext/
Author: mreutegg
Date: Tue May 8 09:36:16 2007
New Revision: 536246
URL: http://svn.apache.org/viewvc?view=rev&rev=536246
Log:
JCR-901: Support synonym searches
Added:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java (with props)
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java Tue May 8 09:36:16 2007
@@ -114,6 +114,11 @@
private PropertyTypeRegistry propRegistry;
/**
+ * The synonym provider or <code>null</code> if none is configured.
+ */
+ private SynonymProvider synonymProvider;
+
+ /**
* Exceptions thrown during tree translation
*/
private List exceptions = new ArrayList();
@@ -121,25 +126,30 @@
/**
* Creates a new <code>LuceneQueryBuilder</code> instance.
*
- * @param root the root node of the abstract query tree.
- * @param session of the user executing this query.
- * @param sharedItemMgr the shared item state manager of the workspace.
- * @param nsMappings namespace resolver for internal prefixes.
- * @param analyzer for parsing the query statement of the contains function.
- * @param propReg the property type registry.
+ * @param root the root node of the abstract query tree.
+ * @param session of the user executing this query.
+ * @param sharedItemMgr the shared item state manager of the workspace.
+ * @param nsMappings namespace resolver for internal prefixes.
+ * @param analyzer for parsing the query statement of the contains
+ * function.
+ * @param propReg the property type registry.
+ * @param synonymProvider the synonym provider or <code>null</code> if node
+ * is configured.
*/
private LuceneQueryBuilder(QueryRootNode root,
SessionImpl session,
ItemStateManager sharedItemMgr,
NamespaceMappings nsMappings,
Analyzer analyzer,
- PropertyTypeRegistry propReg) {
+ PropertyTypeRegistry propReg,
+ SynonymProvider synonymProvider) {
this.root = root;
this.session = session;
this.sharedItemMgr = sharedItemMgr;
this.nsMappings = nsMappings;
this.analyzer = analyzer;
this.propRegistry = propReg;
+ this.synonymProvider = synonymProvider;
}
/**
@@ -150,8 +160,10 @@
* @param session of the user executing the query.
* @param sharedItemMgr the shared item state manager of the workspace.
* @param nsMappings namespace resolver for internal prefixes.
- * @param analyzer for parsing the query statement of the contains function.
- * @param propReg the property type registry to lookup type information.
+ * @param analyzer for parsing the query statement of the contains
+ * function.
+ * @param propReg the property type registry to lookup type
+ * information.
* @return the lucene query tree.
* @throws RepositoryException if an error occurs during the translation.
*/
@@ -162,9 +174,38 @@
Analyzer analyzer,
PropertyTypeRegistry propReg)
throws RepositoryException {
+ return createQuery(root, session, sharedItemMgr,
+ nsMappings, analyzer, propReg, null);
+ }
+
+ /**
+ * Creates a lucene {@link org.apache.lucene.search.Query} tree from an
+ * abstract query tree.
+ *
+ * @param root the root node of the abstract query tree.
+ * @param session of the user executing the query.
+ * @param sharedItemMgr the shared item state manager of the workspace.
+ * @param nsMappings namespace resolver for internal prefixes.
+ * @param analyzer for parsing the query statement of the contains
+ * function.
+ * @param propReg the property type registry to lookup type
+ * information.
+ * @param synonymProvider the synonym provider or <code>null</code> if node
+ * is configured.
+ * @return the lucene query tree.
+ * @throws RepositoryException if an error occurs during the translation.
+ */
+ public static Query createQuery(QueryRootNode root,
+ SessionImpl session,
+ ItemStateManager sharedItemMgr,
+ NamespaceMappings nsMappings,
+ Analyzer analyzer,
+ PropertyTypeRegistry propReg,
+ SynonymProvider synonymProvider)
+ throws RepositoryException {
LuceneQueryBuilder builder = new LuceneQueryBuilder(root, session,
- sharedItemMgr, nsMappings, analyzer, propReg);
+ sharedItemMgr, nsMappings, analyzer, propReg, synonymProvider);
Query q = builder.createLuceneQuery();
if (builder.exceptions.size() > 0) {
@@ -329,7 +370,8 @@
tmp.append(propName.getLocalName());
fieldname = tmp.toString();
}
- QueryParser parser = new QueryParser(fieldname, analyzer);
+ QueryParser parser = new QueryParser(
+ fieldname, analyzer, synonymProvider);
parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
// replace unescaped ' with " and escaped ' with just '
StringBuffer query = new StringBuffer();
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java Tue May 8 09:36:16 2007
@@ -144,7 +144,7 @@
// build lucene query
Query query = LuceneQueryBuilder.createQuery(root, session,
index.getContext().getItemStateManager(), index.getNamespaceMappings(),
- index.getTextAnalyzer(), propReg);
+ index.getTextAnalyzer(), propReg, index.getSynonymProvider());
OrderQueryNode orderNode = root.getOrderNode();
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Tue May 8 09:36:16 2007
@@ -276,6 +276,16 @@
private Class indexingConfigurationClass = IndexingConfigurationImpl.class;
/**
+ * The class that implements {@link SynonymProvider}.
+ */
+ private Class synonymProviderClass;
+
+ /**
+ * The currently set synonym provider.
+ */
+ private SynonymProvider synProvider;
+
+ /**
* Indicates if this <code>SearchIndex</code> is closed and cannot be used
* anymore.
*/
@@ -308,6 +318,7 @@
extractor = createTextExtractor();
indexingConfig = createIndexingConfiguration();
+ synProvider = createSynonymProvider();
File indexDir = new File(path);
@@ -574,6 +585,24 @@
}
/**
+ * @return the synonym provider of this search index. If none is set for
+ * this search index the synonym provider of the parent handler is
+ * returned if there is any.
+ */
+ public SynonymProvider getSynonymProvider() {
+ if (synProvider != null) {
+ return synProvider;
+ } else {
+ QueryHandler handler = getContext().getParentHandler();
+ if (handler instanceof SearchIndex) {
+ return ((SearchIndex) handler).getSynonymProvider();
+ } else {
+ return null;
+ }
+ }
+ }
+
+ /**
* Returns an index reader for this search index. The caller of this method
* is responsible for closing the index reader when he is finished using
* it.
@@ -698,6 +727,23 @@
}
/**
+ * @return the configured synonym provider or <code>null</code> if none is
+ * configured or an error occurs.
+ */
+ protected SynonymProvider createSynonymProvider() {
+ SynonymProvider sp = null;
+ if (synonymProviderClass != null) {
+ try {
+ sp = (SynonymProvider) synonymProviderClass.newInstance();
+ } catch (Exception e) {
+ log.warn("Exception initializing synonym provider: " +
+ synonymProviderClass, e);
+ }
+ }
+ return sp;
+ }
+
+ /**
* Returns the document element of the indexing configuration or
* <code>null</code> if there is no indexing configuration.
*
@@ -1345,6 +1391,38 @@
*/
public String getIndexingConfigurationClass() {
return indexingConfigurationClass.getName();
+ }
+
+ /**
+ * Sets the name of the class that implements {@link SynonymProvider}. The
+ * default value is <code>null</code> (none set).
+ *
+ * @param className name of the class that implements {@link
+ * SynonymProvider}.
+ */
+ public void setSynonymProviderClass(String className) {
+ try {
+ Class clazz = Class.forName(className);
+ if (SynonymProvider.class.isAssignableFrom(clazz)) {
+ synonymProviderClass = clazz;
+ } else {
+ log.warn("Invalid value for synonymProviderClass, {} " +
+ "does not implement SynonymProvider interface.",
+ className);
+ }
+ } catch (ClassNotFoundException e) {
+ log.warn("Invalid value for synonymProviderClass, class {} " +
+ "not found.", className);
+ }
+ }
+
+ /**
+ * @return the class name of the synonym provider implementation or
+ * <code>null</code> if none is set.
+ */
+ public String getSynonymProviderClass() {
+ return synonymProviderClass != null ?
+ synonymProviderClass.getName() : null;
}
//----------------------------< internal >----------------------------------
Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java?view=auto&rev=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java Tue May 8 09:36:16 2007
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+/**
+ * <code>SynonymProvider</code> defines an interface for a component that
+ * returns synonyms for a given term.
+ */
+public interface SynonymProvider {
+
+ /**
+ * Returns an array of terms that are considered synonyms for the given
+ * <code>term</code>.
+ *
+ * @param term a search term.
+ * @return an array of synonyms for the given <code>term</code> or an empty
+ * array if no synonyms are known.
+ */
+ public String[] getSynonyms(String term);
+}
Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt Tue May 8 09:36:16 2007
@@ -35,6 +35,7 @@
import org.apache.lucene.search.*;
import org.apache.jackrabbit.core.query.lucene.WildcardQuery;
+import org.apache.jackrabbit.core.query.lucene.SynonymProvider;
/**
* This class is generated by JavaCC. The only method that clients should need
@@ -101,6 +102,7 @@
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
Locale locale = Locale.getDefault();
+ SynonymProvider synonymProvider;
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed.
@@ -116,12 +118,22 @@
/** Constructs a query parser.
* @param f the default field for query terms.
- * @param a used to find terms in the query text.
+ * @param a used to find terms in the query text.
*/
public QueryParser(String f, Analyzer a) {
+ this(f, a, null);
+ }
+
+ /** Constructs a query parser.
+ * @param f the default field for query terms.
+ * @param a used to find terms in the query text.
+ * @param sp the synonym provider
+ */
+ public QueryParser(String f, Analyzer a, SynonymProvider sp) {
this(new FastCharStream(""));
analyzer = a;
field = f;
+ synonymProvider = sp;
}
/** Parses a query string, returning a
@@ -570,6 +582,34 @@
}
/**
+ * Factory method for generating a synonym query.
+ * Called when parser parses an input term token that has the synonym
+ * prefix (~term) prepended.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getSynonymQuery(String field, String termStr) throws ParseException
+ {
+ Vector synonyms = new Vector();
+ synonyms.add(new BooleanClause(getFieldQuery(field, termStr), BooleanClause.Occur.SHOULD));
+ if (synonymProvider != null) {
+ String[] terms = synonymProvider.getSynonyms(termStr);
+ for (int i = 0; i < terms.length; i++) {
+ synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]), BooleanClause.Occur.SHOULD));
+ }
+ }
+ if (synonyms.size() == 1) {
+ return ((BooleanClause) synonyms.get(0)).getQuery();
+ } else {
+ return getBooleanQuery(synonyms);
+ }
+ }
+
+ /**
* Returns a String where the escape char has been
* removed, or kept only once if there was a double escape.
*/
@@ -656,6 +696,7 @@
| <QUOTED: "\"" (~["\""])+ "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
+| <SYNTERM: "~" <TERM> >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
// support for prefix queries enabled!
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
@@ -768,6 +809,7 @@
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
+ boolean synonym = false;
boolean rangein = false;
Query q;
}
@@ -776,6 +818,7 @@
(
term=<TERM>
| term=<PREFIXTERM> { prefix=true; }
+ | term=<SYNTERM> { synonym=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<NUMBER>
)
@@ -789,6 +832,9 @@
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
+ } else if (synonym) {
+ q = getSynonymQuery(field,
+ discardEscapeChar(term.image.substring(1, term.image.length())));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {