You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2007/05/08 18:36:18 UTC

svn commit: r536246 - in /jackrabbit/trunk/jackrabbit-core/src/main: java/org/apache/jackrabbit/core/query/lucene/ javacc/fulltext/

Author: mreutegg
Date: Tue May  8 09:36:16 2007
New Revision: 536246

URL: http://svn.apache.org/viewvc?view=rev&rev=536246
Log:
JCR-901: Support synonym searches

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java   (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java Tue May  8 09:36:16 2007
@@ -114,6 +114,11 @@
     private PropertyTypeRegistry propRegistry;
 
     /**
+     * The synonym provider or <code>null</code> if none is configured.
+     */
+    private SynonymProvider synonymProvider;
+
+    /**
      * Exceptions thrown during tree translation
      */
     private List exceptions = new ArrayList();
@@ -121,25 +126,30 @@
     /**
      * Creates a new <code>LuceneQueryBuilder</code> instance.
      *
-     * @param root          the root node of the abstract query tree.
-     * @param session       of the user executing this query.
-     * @param sharedItemMgr the shared item state manager of the workspace.
-     * @param nsMappings    namespace resolver for internal prefixes.
-     * @param analyzer      for parsing the query statement of the contains function.
-     * @param propReg       the property type registry.
+     * @param root            the root node of the abstract query tree.
+     * @param session         of the user executing this query.
+     * @param sharedItemMgr   the shared item state manager of the workspace.
+     * @param nsMappings      namespace resolver for internal prefixes.
+     * @param analyzer        for parsing the query statement of the contains
+     *                        function.
+     * @param propReg         the property type registry.
+     * @param synonymProvider the synonym provider or <code>null</code> if node
+     *                        is configured.
      */
     private LuceneQueryBuilder(QueryRootNode root,
                                SessionImpl session,
                                ItemStateManager sharedItemMgr,
                                NamespaceMappings nsMappings,
                                Analyzer analyzer,
-                               PropertyTypeRegistry propReg) {
+                               PropertyTypeRegistry propReg,
+                               SynonymProvider synonymProvider) {
         this.root = root;
         this.session = session;
         this.sharedItemMgr = sharedItemMgr;
         this.nsMappings = nsMappings;
         this.analyzer = analyzer;
         this.propRegistry = propReg;
+        this.synonymProvider = synonymProvider;
     }
 
     /**
@@ -150,8 +160,10 @@
      * @param session       of the user executing the query.
      * @param sharedItemMgr the shared item state manager of the workspace.
      * @param nsMappings    namespace resolver for internal prefixes.
-     * @param analyzer      for parsing the query statement of the contains function.
-     * @param propReg       the property type registry to lookup type information.
+     * @param analyzer      for parsing the query statement of the contains
+     *                      function.
+     * @param propReg       the property type registry to lookup type
+     *                      information.
      * @return the lucene query tree.
      * @throws RepositoryException if an error occurs during the translation.
      */
@@ -162,9 +174,38 @@
                                     Analyzer analyzer,
                                     PropertyTypeRegistry propReg)
             throws RepositoryException {
+        return createQuery(root, session, sharedItemMgr, 
+                nsMappings, analyzer, propReg, null);
+    }
+
+    /**
+     * Creates a lucene {@link org.apache.lucene.search.Query} tree from an
+     * abstract query tree.
+     *
+     * @param root            the root node of the abstract query tree.
+     * @param session         of the user executing the query.
+     * @param sharedItemMgr   the shared item state manager of the workspace.
+     * @param nsMappings      namespace resolver for internal prefixes.
+     * @param analyzer        for parsing the query statement of the contains
+     *                        function.
+     * @param propReg         the property type registry to lookup type
+     *                        information.
+     * @param synonymProvider the synonym provider or <code>null</code> if node
+     *                        is configured.
+     * @return the lucene query tree.
+     * @throws RepositoryException if an error occurs during the translation.
+     */
+    public static Query createQuery(QueryRootNode root,
+                                    SessionImpl session,
+                                    ItemStateManager sharedItemMgr,
+                                    NamespaceMappings nsMappings,
+                                    Analyzer analyzer,
+                                    PropertyTypeRegistry propReg,
+                                    SynonymProvider synonymProvider)
+            throws RepositoryException {
 
         LuceneQueryBuilder builder = new LuceneQueryBuilder(root, session,
-                sharedItemMgr, nsMappings, analyzer, propReg);
+                sharedItemMgr, nsMappings, analyzer, propReg, synonymProvider);
 
         Query q = builder.createLuceneQuery();
         if (builder.exceptions.size() > 0) {
@@ -329,7 +370,8 @@
                 tmp.append(propName.getLocalName());
                 fieldname = tmp.toString();
             }
-            QueryParser parser = new QueryParser(fieldname, analyzer);
+            QueryParser parser = new QueryParser(
+                    fieldname, analyzer, synonymProvider);
             parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
             // replace unescaped ' with " and escaped ' with just '
             StringBuffer query = new StringBuffer();

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/QueryImpl.java Tue May  8 09:36:16 2007
@@ -144,7 +144,7 @@
         // build lucene query
         Query query = LuceneQueryBuilder.createQuery(root, session,
                 index.getContext().getItemStateManager(), index.getNamespaceMappings(),
-                index.getTextAnalyzer(), propReg);
+                index.getTextAnalyzer(), propReg, index.getSynonymProvider());
 
         OrderQueryNode orderNode = root.getOrderNode();
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Tue May  8 09:36:16 2007
@@ -276,6 +276,16 @@
     private Class indexingConfigurationClass = IndexingConfigurationImpl.class;
 
     /**
+     * The class that implements {@link SynonymProvider}.
+     */
+    private Class synonymProviderClass;
+
+    /**
+     * The currently set synonym provider.
+     */
+    private SynonymProvider synProvider;
+
+    /**
      * Indicates if this <code>SearchIndex</code> is closed and cannot be used
      * anymore.
      */
@@ -308,6 +318,7 @@
 
         extractor = createTextExtractor();
         indexingConfig = createIndexingConfiguration();
+        synProvider = createSynonymProvider();
 
         File indexDir = new File(path);
 
@@ -574,6 +585,24 @@
     }
 
     /**
+     * @return the synonym provider of this search index. If none is set for
+     *         this search index the synonym provider of the parent handler is
+     *         returned if there is any.
+     */
+    public SynonymProvider getSynonymProvider() {
+        if (synProvider != null) {
+            return synProvider;
+        } else {
+            QueryHandler handler = getContext().getParentHandler();
+            if (handler instanceof SearchIndex) {
+                return ((SearchIndex) handler).getSynonymProvider();
+            } else {
+                return null;
+            }
+        }
+    }
+
+    /**
      * Returns an index reader for this search index. The caller of this method
      * is responsible for closing the index reader when he is finished using
      * it.
@@ -698,6 +727,23 @@
     }
 
     /**
+     * @return the configured synonym provider or <code>null</code> if none is
+     *         configured or an error occurs.
+     */
+    protected SynonymProvider createSynonymProvider() {
+        SynonymProvider sp = null;
+        if (synonymProviderClass != null) {
+            try {
+                sp = (SynonymProvider) synonymProviderClass.newInstance();
+            } catch (Exception e) {
+                log.warn("Exception initializing synonym provider: " +
+                        synonymProviderClass, e);
+            }
+        }
+        return sp;
+    }
+
+    /**
      * Returns the document element of the indexing configuration or
      * <code>null</code> if there is no indexing configuration.
      *
@@ -1345,6 +1391,38 @@
      */
     public String getIndexingConfigurationClass() {
         return indexingConfigurationClass.getName();
+    }
+
+    /**
+     * Sets the name of the class that implements {@link SynonymProvider}. The
+     * default value is <code>null</code> (none set).
+     *
+     * @param className name of the class that implements {@link
+     *                  SynonymProvider}.
+     */
+    public void setSynonymProviderClass(String className) {
+        try {
+            Class clazz = Class.forName(className);
+            if (SynonymProvider.class.isAssignableFrom(clazz)) {
+                synonymProviderClass = clazz;
+            } else {
+                log.warn("Invalid value for synonymProviderClass, {} " +
+                        "does not implement SynonymProvider interface.",
+                        className);
+            }
+        } catch (ClassNotFoundException e) {
+            log.warn("Invalid value for synonymProviderClass, class {} " +
+                    "not found.", className);
+        }
+    }
+
+    /**
+     * @return the class name of the synonym provider implementation or
+     *         <code>null</code> if none is set.
+     */
+    public String getSynonymProviderClass() {
+        return synonymProviderClass != null ?
+                synonymProviderClass.getName() : null;
     }
 
     //----------------------------< internal >----------------------------------

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java?view=auto&rev=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java Tue May  8 09:36:16 2007
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+/**
+ * <code>SynonymProvider</code> defines an interface for a component that
+ * returns synonyms for a given term.
+ */
+public interface SynonymProvider {
+
+    /**
+     * Returns an array of terms that are considered synonyms for the given
+     * <code>term</code>.
+     *
+     * @param term a search term.
+     * @return an array of synonyms for the given <code>term</code> or an empty
+     *         array if no synonyms are known.
+     */
+    public String[] getSynonyms(String term);
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SynonymProvider.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt?view=diff&rev=536246&r1=536245&r2=536246
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/javacc/fulltext/QueryParser.jjt Tue May  8 09:36:16 2007
@@ -35,6 +35,7 @@
 import org.apache.lucene.search.*;
 
 import org.apache.jackrabbit.core.query.lucene.WildcardQuery;
+import org.apache.jackrabbit.core.query.lucene.SynonymProvider;
 
 /**
  * This class is generated by JavaCC.  The only method that clients should need
@@ -101,6 +102,7 @@
   int phraseSlop = 0;
   float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
   Locale locale = Locale.getDefault();
+  SynonymProvider synonymProvider;
 
   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
    *  @param query  the query string to be parsed.
@@ -116,12 +118,22 @@
 
   /** Constructs a query parser.
    *  @param f  the default field for query terms.
-   *  @param a   used to find terms in the query text.
+   *  @param a  used to find terms in the query text.
    */
   public QueryParser(String f, Analyzer a) {
+    this(f, a, null);
+  }
+
+  /** Constructs a query parser.
+   *  @param f  the default field for query terms.
+   *  @param a  used to find terms in the query text.
+   *  @param sp the synonym provider
+   */
+  public QueryParser(String f, Analyzer a, SynonymProvider sp) {
     this(new FastCharStream(""));
     analyzer = a;
     field = f;
+    synonymProvider = sp;
   }
 
   /** Parses a query string, returning a
@@ -570,6 +582,34 @@
   }
 
   /**
+   * Factory method for generating a synonym query.
+   * Called when parser parses an input term token that has the synonym
+   * prefix (~term) prepended.
+   *
+   * @param field Name of the field query will use.
+   * @param termStr Term token to use for building term for the query
+   *
+   * @return Resulting {@link Query} built for the term
+   * @exception ParseException throw in overridden method to disallow
+   */
+  protected Query getSynonymQuery(String field, String termStr) throws ParseException
+  {
+    Vector synonyms = new Vector();
+    synonyms.add(new BooleanClause(getFieldQuery(field, termStr), BooleanClause.Occur.SHOULD));
+    if (synonymProvider != null) {
+      String[] terms = synonymProvider.getSynonyms(termStr);
+      for (int i = 0; i < terms.length; i++) {
+        synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]), BooleanClause.Occur.SHOULD));
+      }
+    }
+    if (synonyms.size() == 1) {
+      return ((BooleanClause) synonyms.get(0)).getQuery();
+    } else {
+      return getBooleanQuery(synonyms);
+    }
+  }
+
+  /**
    * Returns a String where the escape char has been
    * removed, or kept only once if there was a double escape.
    */
@@ -656,6 +696,7 @@
 | <QUOTED:     "\"" (~["\""])+ "\"">
 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
 | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
+| <SYNTERM: "~" <TERM> >
 | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
 // support for prefix queries enabled!
 | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
@@ -768,6 +809,7 @@
   boolean prefix = false;
   boolean wildcard = false;
   boolean fuzzy = false;
+  boolean synonym = false;
   boolean rangein = false;
   Query q;
 }
@@ -776,6 +818,7 @@
      (
        term=<TERM>
        | term=<PREFIXTERM> { prefix=true; }
+       | term=<SYNTERM> { synonym=true; }
        | term=<WILDTERM> { wildcard=true; }
        | term=<NUMBER>
      )
@@ -789,6 +832,9 @@
          q = getPrefixQuery(field,
            discardEscapeChar(term.image.substring
           (0, term.image.length()-1)));
+       } else if (synonym) {
+         q = getSynonymQuery(field,
+           discardEscapeChar(term.image.substring(1, term.image.length())));
        } else if (fuzzy) {
        	  float fms = fuzzyMinSim;
        	  try {