You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2015/07/01 14:58:42 UTC
svn commit: r1688636 -
/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Author: alexparvulescu
Date: Wed Jul 1 12:58:41 2015
New Revision: 1688636
URL: http://svn.apache.org/r1688636
Log:
OAK-2934 Certain searches cause lucene index to hit OutOfMemoryError
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1688636&r1=1688635&r2=1688636&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java Wed Jul 1 12:58:41 2015
@@ -31,7 +31,6 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
import static org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.skipTokenization;
import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH;
-import static org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
import static org.apache.lucene.search.BooleanClause.Occur.MUST;
import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
@@ -48,12 +47,6 @@ import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
-
-import com.google.common.collect.AbstractIterator;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Queues;
-import com.google.common.collect.Sets;
-
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Result.SizePrecision;
@@ -77,6 +70,7 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.query.IndexRow;
import org.apache.jackrabbit.oak.spi.query.PropertyValues;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -88,15 +82,13 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
@@ -108,13 +100,15 @@ import org.apache.lucene.search.TotalHit
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Queues;
+import com.google.common.collect.Sets;
+
/**
* Provides a QueryIndex that does lookups against a Lucene-based index
*
@@ -912,18 +906,15 @@ public class LuceneIndex implements Adva
}
} else {
if (hasFulltextToken(tokens)) {
- MultiPhraseQuery mpq = new MultiPhraseQuery();
+ BooleanQuery bq = new BooleanQuery();
for(String token: tokens){
if (hasFulltextToken(token)) {
- Term[] terms = extractMatchingTokens(reader, fieldName, token);
- if (terms != null && terms.length > 0) {
- mpq.add(terms);
- }
+ bq.add(new WildcardQuery(newFulltextTerm(token, fieldName)), Occur.MUST);
} else {
- mpq.add(newFulltextTerm(token, fieldName));
+ bq.add(new TermQuery(newFulltextTerm(token, fieldName)), Occur.MUST);
}
}
- return mpq;
+ return bq;
} else {
PhraseQuery pq = new PhraseQuery();
for (String t : tokens) {
@@ -934,36 +925,6 @@ public class LuceneIndex implements Adva
}
}
- private static Term[] extractMatchingTokens(IndexReader reader, String fieldName, String token) {
- if (reader == null) {
- // getPlan call
- return null;
- }
-
- try {
- List<Term> terms = new ArrayList<Term>();
- Term onTerm = newFulltextTerm(token, fieldName);
- Terms t = MultiFields.getTerms(reader, onTerm.field());
-
- //No existing field with given name indexed so no possible term values
- if (t == null){
- return new Term[0];
- }
-
- Automaton a = WildcardQuery.toAutomaton(onTerm);
- CompiledAutomaton ca = new CompiledAutomaton(a);
- TermsEnum te = ca.getTermsEnum(t);
- BytesRef text;
- while ((text = te.next()) != null) {
- terms.add(newFulltextTerm(text.utf8ToString(), fieldName));
- }
- return terms.toArray(new Term[terms.size()]);
- } catch (IOException e) {
- LOG.error("Building fulltext query failed", e.getMessage());
- return null;
- }
- }
-
private static boolean hasFulltextToken(List<String> tokens) {
for (String token : tokens) {
if (hasFulltextToken(token)) {