You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2008/12/05 14:07:00 UTC
svn commit: r723728 - in /jackrabbit/trunk/jackrabbit-core: ./
src/main/java/org/apache/jackrabbit/core/query/lucene/
src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/
src/main/javacc/
Author: mreutegg
Date: Fri Dec 5 05:06:59 2008
New Revision: 723728
URL: http://svn.apache.org/viewvc?rev=723728&view=rev
Log:
JCR-1898: Replace customized QueryParser.jjt
Added:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (with props)
Removed:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/fulltext/
jackrabbit/trunk/jackrabbit-core/src/main/javacc/
Modified:
jackrabbit/trunk/jackrabbit-core/pom.xml
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
Modified: jackrabbit/trunk/jackrabbit-core/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/pom.xml?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/pom.xml (original)
+++ jackrabbit/trunk/jackrabbit-core/pom.xml Fri Dec 5 05:06:59 2008
@@ -65,23 +65,6 @@
</dependencies>
</plugin>
<plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>javacc-maven-plugin</artifactId>
- <version>2.4.1</version>
- <executions>
- <execution>
- <id>fulltext</id>
- <configuration>
- <sourceDirectory>${basedir}/src/main/javacc/fulltext</sourceDirectory>
- <packageName>org.apache.jackrabbit.core.query.lucene.fulltext</packageName>
- </configuration>
- <goals>
- <goal>jjtree-javacc</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JQOM2LuceneQueryBuilder.java Fri Dec 5 05:06:59 2008
@@ -17,7 +17,6 @@
package org.apache.jackrabbit.core.query.lucene;
import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
import org.apache.jackrabbit.core.SessionImpl;
import org.apache.jackrabbit.core.HierarchyManager;
import org.apache.jackrabbit.core.NodeImpl;
@@ -66,6 +65,7 @@
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -354,39 +354,9 @@
tmp.append(propName.getLocalName());
fieldname = tmp.toString();
}
- QueryParser parser = new QueryParser(
+ QueryParser parser = new JackrabbitQueryParser(
fieldname, analyzer, synonymProvider);
- parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
- // replace escaped ' with just '
- StringBuffer query = new StringBuffer();
- String textsearch = node.getFullTextSearchExpression();
- // the default lucene query parser recognizes 'AND' and 'NOT' as
- // keywords.
- textsearch = textsearch.replaceAll("AND", "and");
- textsearch = textsearch.replaceAll("NOT", "not");
- boolean escaped = false;
- for (int i = 0; i < textsearch.length(); i++) {
- if (textsearch.charAt(i) == '\\') {
- if (escaped) {
- query.append("\\\\");
- escaped = false;
- } else {
- escaped = true;
- }
- } else if (textsearch.charAt(i) == '\'') {
- if (escaped) {
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- } else {
- if (escaped) {
- query.append('\\');
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- }
- }
- return parser.parse(query.toString());
+ return parser.parse(node.getFullTextSearchExpression());
}
public Object visit(FullTextSearchScoreImpl node, Object data) {
Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java?rev=723728&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java Fri Dec 5 05:06:59 2008
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.util.Vector;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.BooleanClause;
+
+/**
+ * <code>JackrabbitQueryParser</code> extends the standard lucene query parser
+ * and adds JCR specific customizations.
+ */
+public class JackrabbitQueryParser extends QueryParser {
+
+ /**
+ * The Jackrabbit synonym provider or <code>null</code> if there is none.
+ */
+ private final SynonymProvider synonymProvider;
+
+ /**
+ * Creates a new query parser instance.
+ *
+ * @param fieldName the field name.
+ * @param analyzer the analyzer.
+ * @param synonymProvider the synonym provider or <code>null</code> if none
+ * is available.
+ */
+ public JackrabbitQueryParser(String fieldName,
+ Analyzer analyzer,
+ SynonymProvider synonymProvider) {
+ super(fieldName, analyzer);
+ this.synonymProvider = synonymProvider;
+ setAllowLeadingWildcard(true);
+ setDefaultOperator(Operator.AND);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public Query parse(String textsearch) throws ParseException {
+ // replace escaped ' with just '
+ StringBuffer rewritten = new StringBuffer();
+ // the default lucene query parser recognizes 'AND' and 'NOT' as
+ // keywords.
+ textsearch = textsearch.replaceAll("AND", "and");
+ textsearch = textsearch.replaceAll("NOT", "not");
+ boolean escaped = false;
+ for (int i = 0; i < textsearch.length(); i++) {
+ if (textsearch.charAt(i) == '\\') {
+ if (escaped) {
+ rewritten.append("\\\\");
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ } else if (textsearch.charAt(i) == '\'') {
+ if (escaped) {
+ escaped = false;
+ }
+ rewritten.append(textsearch.charAt(i));
+ } else if (textsearch.charAt(i) == '~') {
+ if (i == 0 || Character.isWhitespace(textsearch.charAt(i - 1))) {
+ // escape tilde so we can use it for similarity query
+ rewritten.append("\\");
+ }
+ rewritten.append('~');
+ } else {
+ if (escaped) {
+ rewritten.append('\\');
+ escaped = false;
+ }
+ rewritten.append(textsearch.charAt(i));
+ }
+ }
+ return super.parse(rewritten.toString());
+ }
+
+ /**
+ * Factory method for generating a synonym query.
+ * Called when parser parses an input term token that has the synonym
+ * prefix (~term) prepended.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting {@link Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getSynonymQuery(String field, String termStr)
+ throws ParseException {
+ Vector synonyms = new Vector();
+ synonyms.add(new BooleanClause(getFieldQuery(field, termStr),
+ BooleanClause.Occur.SHOULD));
+ if (synonymProvider != null) {
+ String[] terms = synonymProvider.getSynonyms(termStr);
+ for (int i = 0; i < terms.length; i++) {
+ synonyms.add(new BooleanClause(getFieldQuery(field, terms[i]),
+ BooleanClause.Occur.SHOULD));
+ }
+ }
+ if (synonyms.size() == 1) {
+ return ((BooleanClause) synonyms.get(0)).getQuery();
+ } else {
+ return getBooleanQuery(synonyms);
+ }
+ }
+
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getFieldQuery(String field, String queryText)
+ throws ParseException {
+ if (queryText.startsWith("~")) {
+ // synonym query
+ return getSynonymQuery(field, queryText.substring(1));
+ } else {
+ return super.getFieldQuery(field, queryText);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getPrefixQuery(String field, String termStr)
+ throws ParseException {
+ return getWildcardQuery(field, termStr + "*");
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected Query getWildcardQuery(String field, String termStr)
+ throws ParseException {
+ if (getLowercaseExpandedTerms()) {
+ termStr = termStr.toLowerCase();
+ }
+ return new WildcardQuery(field, null, translateWildcards(termStr));
+ }
+
+ /**
+ * Translates unescaped wildcards '*' and '?' into '%' and '_'.
+ *
+ * @param input the input String.
+ * @return the translated String.
+ */
+ private String translateWildcards(String input) {
+ StringBuffer translated = new StringBuffer(input.length());
+ boolean escaped = false;
+ for (int i = 0; i < input.length(); i++) {
+ if (input.charAt(i) == '\\') {
+ if (escaped) {
+ translated.append("\\\\");
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ } else if (input.charAt(i) == '*') {
+ if (escaped) {
+ translated.append('*');
+ escaped = false;
+ } else {
+ translated.append('%');
+ }
+ } else if (input.charAt(i) == '?') {
+ if (escaped) {
+ translated.append('?');
+ escaped = false;
+ } else {
+ translated.append('_');
+ }
+ } else if (input.charAt(i) == '%' || input.charAt(i) == '_') {
+ // escape every occurrence of '%' and '_'
+ escaped = false;
+ translated.append('\\').append(input.charAt(i));
+ } else {
+ if (escaped) {
+ translated.append('\\');
+ escaped = false;
+ }
+ translated.append(input.charAt(i));
+ }
+ }
+ return translated.toString();
+ }
+}
Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitQueryParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java?rev=723728&r1=723727&r2=723728&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LuceneQueryBuilder.java Fri Dec 5 05:06:59 2008
@@ -37,8 +37,6 @@
import org.apache.jackrabbit.core.SearchManager;
import org.apache.jackrabbit.core.SessionImpl;
import org.apache.jackrabbit.core.query.PropertyTypeRegistry;
-import org.apache.jackrabbit.core.query.lucene.fulltext.ParseException;
-import org.apache.jackrabbit.core.query.lucene.fulltext.QueryParser;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.Path;
@@ -73,6 +71,8 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -379,39 +379,9 @@
tmp.append(propName.getLocalName());
fieldname = tmp.toString();
}
- QueryParser parser = new QueryParser(
+ QueryParser parser = new JackrabbitQueryParser(
fieldname, analyzer, synonymProvider);
- parser.setOperator(QueryParser.DEFAULT_OPERATOR_AND);
- // replace escaped ' with just '
- StringBuffer query = new StringBuffer();
- String textsearch = node.getQuery();
- // the default lucene query parser recognizes 'AND' and 'NOT' as
- // keywords.
- textsearch = textsearch.replaceAll("AND", "and");
- textsearch = textsearch.replaceAll("NOT", "not");
- boolean escaped = false;
- for (int i = 0; i < textsearch.length(); i++) {
- if (textsearch.charAt(i) == '\\') {
- if (escaped) {
- query.append("\\\\");
- escaped = false;
- } else {
- escaped = true;
- }
- } else if (textsearch.charAt(i) == '\'') {
- if (escaped) {
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- } else {
- if (escaped) {
- query.append('\\');
- escaped = false;
- }
- query.append(textsearch.charAt(i));
- }
- }
- Query context = parser.parse(query.toString());
+ Query context = parser.parse(node.getQuery());
if (relPath != null && (!node.getReferencesProperty() || relPath.getLength() > 1)) {
// text search on some child axis
Path.Element[] elements = relPath.getElements();