You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/12/09 21:23:43 UTC
svn commit: r1549667 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Author: alexparvulescu
Date: Mon Dec 9 20:23:43 2013
New Revision: 1549667
URL: http://svn.apache.org/r1549667
Log:
OAK-1270 Revisit full-text queries in case of multiple tokens
- introduced a MultiPhraseQuery for the multiple token case
- fixed a bug in the tokenizer
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1549667&r1=1549666&r2=1549667&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java Mon Dec 9 20:23:43 2013
@@ -73,10 +73,13 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
@@ -87,6 +90,9 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -416,7 +422,7 @@ public class LuceneIndex implements Full
// when using the LowCostLuceneIndexProvider
// which is used for testing
} else {
- qs.add(getFullTextQuery(ft, analyzer));
+ qs.add(getFullTextQuery(ft, analyzer, reader));
}
if (nonFullTextConstraints) {
addNonFullTextConstraints(qs, filter, reader);
@@ -582,7 +588,7 @@ public class LuceneIndex implements Full
qs.add(bq);
}
- static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer) {
+ static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer, final IndexReader reader) {
// a reference to the query, so it can be set in the visitor
// (a "non-local return")
final AtomicReference<Query> result = new AtomicReference<Query>();
@@ -592,7 +598,7 @@ public class LuceneIndex implements Full
public boolean visit(FullTextOr or) {
BooleanQuery q = new BooleanQuery();
for (FullTextExpression e : or.list) {
- Query x = getFullTextQuery(e, analyzer);
+ Query x = getFullTextQuery(e, analyzer, reader);
q.add(x, SHOULD);
}
result.set(q);
@@ -603,7 +609,7 @@ public class LuceneIndex implements Full
public boolean visit(FullTextAnd and) {
BooleanQuery q = new BooleanQuery();
for (FullTextExpression e : and.list) {
- Query x = getFullTextQuery(e, analyzer);
+ Query x = getFullTextQuery(e, analyzer, reader);
// Lucene can't deal with "must(must_not(x))"
if (x instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) x;
@@ -625,7 +631,7 @@ public class LuceneIndex implements Full
// do not add constraints on child nodes properties
p = "*";
}
- Query q = tokenToQuery(term.getText(), analyzer);
+ Query q = tokenToQuery(term.getText(), analyzer, reader);
if (q == null) {
return false;
}
@@ -646,7 +652,7 @@ public class LuceneIndex implements Full
return result.get();
}
- static Query tokenToQuery(String text, Analyzer analyzer) {
+ static Query tokenToQuery(String text, Analyzer analyzer, IndexReader reader) {
if (analyzer == null) {
return null;
}
@@ -657,29 +663,76 @@ public class LuceneIndex implements Full
// TODO what should be returned in the case there are no tokens?
return new BooleanQuery();
}
-
if (tokens.size() == 1) {
- text = tokens.iterator().next();
- boolean hasFulltextToken = false;
- for (char c : fulltextTokens) {
- if (text.indexOf(c) != -1) {
- hasFulltextToken = true;
- break;
+ String token = tokens.iterator().next();
+ if (hasFulltextToken(token)) {
+ return new WildcardQuery(newFulltextTerm(token));
+ } else {
+ return new TermQuery(newFulltextTerm(token));
+ }
+ } else {
+ if (hasFulltextToken(tokens)) {
+ MultiPhraseQuery mpq = new MultiPhraseQuery();
+ for(String token: tokens){
+ if (hasFulltextToken(token)) {
+ Term[] terms = extractMatchingTokens(reader, token);
+ if (terms != null && terms.length > 0) {
+ mpq.add(terms);
+ }
+ } else {
+ mpq.add(newFulltextTerm(token));
+ }
}
+ return mpq;
+ } else {
+ PhraseQuery pq = new PhraseQuery();
+ for (String t : tokens) {
+ pq.add(newFulltextTerm(t));
+ }
+ return pq;
}
+ }
+ }
- if (hasFulltextToken) {
- return new WildcardQuery(newFulltextTerm(text));
- } else {
- return new TermQuery(newFulltextTerm(text));
+ private static Term[] extractMatchingTokens(IndexReader reader, String token) {
+ if (reader == null) {
+ // getPlan call
+ return null;
+ }
+
+ try {
+ List<Term> terms = new ArrayList<Term>();
+ Terms t = MultiFields.getTerms(reader, FieldNames.FULLTEXT);
+ Automaton a = WildcardQuery.toAutomaton(newFulltextTerm(token));
+ CompiledAutomaton ca = new CompiledAutomaton(a);
+ TermsEnum te = ca.getTermsEnum(t);
+ BytesRef text;
+ while ((text = te.next()) != null) {
+ terms.add(newFulltextTerm(text.utf8ToString()));
}
- } else {
- PhraseQuery pq = new PhraseQuery();
- for (String t : tokens) {
- pq.add(newFulltextTerm(t));
+ return terms.toArray(new Term[terms.size()]);
+ } catch (IOException e) {
+ LOG.error("Building fulltext query failed", e.getMessage());
+ return null;
+ }
+ }
+
+ private static boolean hasFulltextToken(List<String> tokens) {
+ for (String token : tokens) {
+ if (hasFulltextToken(token)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean hasFulltextToken(String token) {
+ for (char c : fulltextTokens) {
+ if (token.indexOf(c) != -1) {
+ return true;
}
- return pq;
}
+ return false;
}
private static char[] fulltextTokens = new char[] { '*', '?' };
@@ -727,6 +780,7 @@ public class LuceneIndex implements Full
poz = end;
if (hasFulltextToken) {
token.append(term);
+ hasFulltextToken = false;
} else {
if (token.length() > 0) {
tokens.add(token.toString());
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1549667&r1=1549666&r2=1549667&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Mon Dec 9 20:23:43 2013
@@ -148,6 +148,21 @@ public class LuceneIndexQueryTest extend
}
@Test
+ public void containsDash() throws Exception {
+ Tree test = root.getTree("/").addChild("test");
+ test.addChild("a").setProperty("name", "hello-wor");
+ test.addChild("b").setProperty("name", "hello-world");
+ test.addChild("c").setProperty("name", "hello");
+ root.commit();
+
+ assertQuery("/jcr:root//*[jcr:contains(., 'hello-wor*')]", "xpath",
+ ImmutableList.of("/test/a", "/test/b"));
+ assertQuery("/jcr:root//*[jcr:contains(., '*hello-wor*')]", "xpath",
+ ImmutableList.of("/test/a", "/test/b"));
+
+ }
+
+ @Test
public void containsPath() throws Exception {
Tree test = root.getTree("/").addChild("test");
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java?rev=1549667&r1=1549666&r2=1549667&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java Mon Dec 9 20:23:43 2013
@@ -162,6 +162,15 @@ public class LuceneIndexTest {
LuceneIndex.tokenize("first. second", analyzer));
assertEquals(ImmutableList.of("first", "second"),
LuceneIndex.tokenize("first.second", analyzer));
+
+ assertEquals(ImmutableList.of("hello", "world"),
+ LuceneIndex.tokenize("hello-world", analyzer));
+ assertEquals(ImmutableList.of("hello", "wor*"),
+ LuceneIndex.tokenize("hello-wor*", analyzer));
+ assertEquals(ImmutableList.of("*llo", "world"),
+ LuceneIndex.tokenize("*llo-world", analyzer));
+ assertEquals(ImmutableList.of("*llo", "wor*"),
+ LuceneIndex.tokenize("*llo-wor*", analyzer));
}
}