You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2008/12/05 14:07:00 UTC

svn commit: r723728 - in /jackrabbit/trunk/jackrabbit-core: ./ src/main/java/org/apache/jackrabbit/core/query/lucene/ src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/ src/main/javacc/

Author: mreutegg
Date: Fri Dec  5 05:06:59 2008
New Revision: 723728

URL: http://svn.apache.org/viewvc?rev=723728&view=rev
Log:
JCR-1898: Replace customized QueryParser.jjt

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java   (with props)
Removed:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/
    jackrabbit/trunk/jackrabbit-core/src/main/javacc/
Modified:
    jackrabbit/trunk/jackrabbit-core/pom.xml
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java

Modified: jackrabbit/trunk/jackrabbit-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/pom.xml?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/pom.xml (original)
+++ jackrabbit/trunk/jackrabbit-core/pom.xml Fri Dec  5 05:06:59 2008
@@ -65,23 +65,6 @@
         </dependencies>
       </plugin>
       <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>javacc-maven-plugin</artifactId>
-        <version>2.4.1</version>
-        <executions>
-          <execution>
-            <id>fulltext</id>
-            <configuration>
-              <sourceDirectory>${basedir}/src/main/javacc/fulltext</sourceDirectory>
-              <packageName>org.apache.jackrabbit.core.query.lucene.fulltext</packageName>
-            </configuration>
-            <goals>
-              <goal>jjtree-javacc</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
           <includes>

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java Fri Dec  5 05:06:59 2008
@@ -17,7 +17,6 @@
 package org.apache.jackrabbit.core.query.lucene;
 
 import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
 import org.apache.jackrabbit.core.SessionImpl;
 import org.apache.jackrabbit.core.HierarchyManager;
 import org.apache.jackrabbit.core.NodeImpl;
@@ -66,6 +65,7 @@
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -354,39 +354,9 @@
             tmp.append(propName.getLocalName());
             fieldname = tmp.toString();
         }
-        QueryParser parser = new QueryParser(
+        QueryParser parser = new JackrabbitQueryParser(
                 fieldname, analyzer, synonymProvider);
-        parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
-        // replace escaped ' with just '
-        StringBuffer query = new StringBuffer();
-        String textsearch = node.getFullTextSearchExpression();
-        // the default lucene query parser recognizes 'AND' and 'NOT' as
-        // keywords.
-        textsearch = textsearch.replaceAll("AND", "and");
-        textsearch = textsearch.replaceAll("NOT", "not");
-        boolean escaped = false;
-        for (int i = 0; i < textsearch.length(); i++) {
-            if (textsearch.charAt(i) == '\\') {
-                if (escaped) {
-                    query.append("\\\\");
-                    escaped = false;
-                } else {
-                    escaped = true;
-                }
-            } else if (textsearch.charAt(i) == '\'') {
-                if (escaped) {
-                    escaped = false;
-                }
-                query.append(textsearch.charAt(i));
-            } else {
-                if (escaped) {
-                    query.append('\\');
-                    escaped = false;
-                }
-                query.append(textsearch.charAt(i));
-            }
-        }
-        return parser.parse(query.toString());
+        return parser.parse(node.getFullTextSearchExpression());
     }
 
     public Object visit(FullTextSearchScoreImpl node, Object data) {

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java?rev=723728&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java Fri Dec  5 05:06:59 2008
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.util.Vector;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.BooleanClause;
+
+/**
+ * <code>JackrabbitQueryParser</code> extends the standard lucene query parser
+ * and adds JCR specific customizations.
+ */
+public class JackrabbitQueryParser extends QueryParser {
+
+    /**
+     * The Jackrabbit synonym provider or <code>null</code> if there is none.
+     */
+    private final SynonymProvider synonymProvider;
+
+    /**
+     * Creates a new query parser instance.
+     *
+     * @param fieldName       the field name.
+     * @param analyzer        the analyzer.
+     * @param synonymProvider the synonym provider or <code>null</code> if none
+     *                        is available.
+     */
+    public JackrabbitQueryParser(String fieldName,
+                                 Analyzer analyzer,
+                                 SynonymProvider synonymProvider) {
+        super(fieldName, analyzer);
+        this.synonymProvider = synonymProvider;
+        setAllowLeadingWildcard(true);
+        setDefaultOperator(Operator.AND);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public Query parse(String textsearch) throws ParseException {
+        // replace escaped ' with just '
+        StringBuffer rewritten = new StringBuffer();
+        // the default lucene query parser recognizes 'AND' and 'NOT' as
+        // keywords.
+        textsearch = textsearch.replaceAll("AND", "and");
+        textsearch = textsearch.replaceAll("NOT", "not");
+        boolean escaped = false;
+        for (int i = 0; i < textsearch.length(); i++) {
+            if (textsearch.charAt(i) == '\\') {
+                if (escaped) {
+                    rewritten.append("\\\\");
+                    escaped = false;
+                } else {
+                    escaped = true;
+                }
+            } else if (textsearch.charAt(i) == '\'') {
+                if (escaped) {
+                    escaped = false;
+                }
+                rewritten.append(textsearch.charAt(i));
+            } else if (textsearch.charAt(i) == '~') {
+                if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) {
+                    // escape tilde so we can use it for similarity query
+                    rewritten.append("\\");
+                }
+                rewritten.append('~');
+            } else {
+                if (escaped) {
+                    rewritten.append('\\');
+                    escaped = false;
+                }
+                rewritten.append(textsearch.charAt(i));
+            }
+        }
+        return super.parse(rewritten.toString());
+    }
+
+    /**
+     * Factory method for generating a synonym query.
+     * Called when parser parses an input term token that has the synonym
+     * prefix (~term) prepended.
+     *
+     * @param field Name of the field query will use.
+     * @param termStr Term token to use for building term for the query
+     *
+     * @return Resulting {@link Query} built for the term
+     * @exception ParseException throw in overridden method to disallow
+     */
+    protected Query getSynonymQuery(String field, String termStr)
+            throws ParseException {
+        Vector synonyms = new Vector();
+        synonyms.add(new BooleanClause(getFieldQuery(field, termStr),
+                BooleanClause.Occur.SHOULD));
+        if (synonymProvider != null) {
+            String[] terms = synonymProvider.getSynonyms(termStr);
+            for (int i = 0; i < terms.length; i++) {
+                synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]),
+                        BooleanClause.Occur.SHOULD));
+            }
+        }
+        if (synonyms.size() == 1) {
+            return ((BooleanClause) synonyms.get(0)).getQuery();
+        } else {
+            return getBooleanQuery(synonyms);
+        }
+    }
+
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Query getFieldQuery(String field, String queryText)
+            throws ParseException {
+        if (queryText.startsWith("~")) {
+            // synonym query
+            return getSynonymQuery(field, queryText.substring(1));
+        } else {
+            return super.getFieldQuery(field, queryText);
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Query getPrefixQuery(String field, String termStr)
+            throws ParseException {
+        return getWildcardQuery(field, termStr + "*");
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    protected Query getWildcardQuery(String field, String termStr)
+            throws ParseException {
+        if (getLowercaseExpandedTerms()) {
+            termStr = termStr.toLowerCase();
+        }
+        return new WildcardQuery(field, null, translateWildcards(termStr));
+    }
+
+    /**
+     * Translates unescaped wildcards '*' and '?' into '%' and '_'.
+     *
+     * @param input the input String.
+     * @return the translated String.
+     */
+    private String translateWildcards(String input) {
+        StringBuffer translated = new StringBuffer(input.length());
+        boolean escaped = false;
+        for (int i = 0; i < input.length(); i++) {
+            if (input.charAt(i) == '\\') {
+                if (escaped) {
+                    translated.append("\\\\");
+                    escaped = false;
+                } else {
+                    escaped = true;
+                }
+            } else if (input.charAt(i) == '*') {
+                if (escaped) {
+                    translated.append('*');
+                    escaped = false;
+                } else {
+                    translated.append('%');
+                }
+            } else if (input.charAt(i) == '?') {
+                if (escaped) {
+                    translated.append('?');
+                    escaped = false;
+                } else {
+                    translated.append('_');
+                }
+            } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
+                // escape every occurrence of '%' and '_'
+                escaped = false;
+                translated.append('\\').append(input.charAt(i));
+            } else {
+                if (escaped) {
+                    translated.append('\\');
+                    escaped = false;
+                }
+                translated.append(input.charAt(i));
+            }
+        }
+        return translated.toString();
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java Fri Dec  5 05:06:59 2008
@@ -37,8 +37,6 @@
 import org.apache.jackrabbit.core.SearchManager;
 import org.apache.jackrabbit.core.SessionImpl;
 import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.ParseException;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
 import org.apache.jackrabbit.core.state.ItemStateManager;
 import org.apache.jackrabbit.spi.Name;
 import org.apache.jackrabbit.spi.Path;
@@ -73,6 +71,8 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -379,39 +379,9 @@
                 tmp.append(propName.getLocalName());
                 fieldname = tmp.toString();
             }
-            QueryParser parser = new QueryParser(
+            QueryParser parser = new JackrabbitQueryParser(
                     fieldname, analyzer, synonymProvider);
-            parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
-            // replace escaped ' with just '
-            StringBuffer query = new StringBuffer();
-            String textsearch = node.getQuery();
-            // the default lucene query parser recognizes 'AND' and 'NOT' as
-            // keywords.
-            textsearch = textsearch.replaceAll("AND", "and");
-            textsearch = textsearch.replaceAll("NOT", "not");
-            boolean escaped = false;
-            for (int i = 0; i < textsearch.length(); i++) {
-                if (textsearch.charAt(i) == '\\') {
-                    if (escaped) {
-                        query.append("\\\\");
-                        escaped = false;
-                    } else {
-                        escaped = true;
-                    }
-                } else if (textsearch.charAt(i) == '\'') {
-                    if (escaped) {
-                        escaped = false;
-                    }
-                    query.append(textsearch.charAt(i));
-                } else {
-                    if (escaped) {
-                        query.append('\\');
-                        escaped = false;
-                    }
-                    query.append(textsearch.charAt(i));
-                }
-            }
-            Query context = parser.parse(query.toString());
+            Query context = parser.parse(node.getQuery());
             if (relPath != null && (!node.getReferencesProperty() || relPath.getLength() > 1)) {
                 // text search on some child axis
                 Path.Element[] elements = relPath.getElements();