You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by al...@apache.org on 2013/08/12 09:21:10 UTC

svn commit: r1513061 - in /jackrabbit/oak/trunk/oak-lucene: ./ src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Author: alexparvulescu
Date: Mon Aug 12 07:21:09 2013
New Revision: 1513061

URL: http://svn.apache.org/r1513061
Log:
OAK-954 Make Lucene analyzer configurable

Modified:
    jackrabbit/oak/trunk/oak-lucene/pom.xml
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProvider.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LowCostLuceneIndexProvider.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/pom.xml?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-lucene/pom.xml Mon Aug 12 07:21:09 2013
@@ -36,12 +36,9 @@
     <tika.version>1.3</tika.version>
     <lucene.version>4.3.0</lucene.version>
     <known.issues>
-      org.apache.jackrabbit.core.query.FulltextQueryTest#testFulltextExcludeSQL      <!-- OAK-819 -->
-      org.apache.jackrabbit.core.query.FulltextQueryTest#testFulltextOrSQL           <!-- OAK-819 -->
-      org.apache.jackrabbit.core.query.FulltextQueryTest#testFulltextIntercapSQL     <!-- OAK-819 -->
       org.apache.jackrabbit.core.query.FulltextQueryTest#testContainsPropScopeSQL    <!-- OAK-819 -->
       org.apache.jackrabbit.core.query.FulltextQueryTest#testContainsPropScopeXPath  <!-- OAK-819 -->
-      org.apache.jackrabbit.core.query.FulltextQueryTest#testMultiByte               <!-- OAK-819 -->
+      org.apache.jackrabbit.core.query.FulltextQueryTest#testMultiByte               <!-- OAK-954 -->
       org.apache.jackrabbit.core.query.JoinTest#testJoinWithOR4 <!-- OAK-819 -->
       org.apache.jackrabbit.core.query.JoinTest#testJoinWithOR5 <!-- OAK-819 -->
     </known.issues>

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java Mon Aug 12 07:21:09 2013
@@ -24,10 +24,10 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH_SELECTOR;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_FILE;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_OAK;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newFulltextTerm;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newPathTerm;
@@ -38,18 +38,17 @@ import static org.apache.lucene.search.B
 
 import java.io.File;
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
-import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
 import org.apache.jackrabbit.oak.query.fulltext.FullTextOr;
@@ -64,6 +63,10 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.ReadOnlyBuilder;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
@@ -85,8 +88,6 @@ import org.apache.lucene.store.FSDirecto
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.collect.Lists;
-
 /**
  * Provides a QueryIndex that does lookups against a Lucene-based index
  * 
@@ -127,7 +128,10 @@ public class LuceneIndex implements Full
     private static final Logger LOG = LoggerFactory
             .getLogger(LuceneIndex.class);
 
-    public LuceneIndex() {
+    private final Analyzer analyzer;
+
+    public LuceneIndex(Analyzer analyzer) {
+        this.analyzer = analyzer;
     }
 
     @Override
@@ -137,9 +141,6 @@ public class LuceneIndex implements Full
 
     @Override
     public double getCost(Filter filter, NodeState root) {
-        if (!FullTextSearchImpl.OAK_890_ADVANCED_FT_SEARCH) {
-            return getCostOld(filter, root);
-        }
         if (!isLive(root)) {
             // unusable index
             return Double.POSITIVE_INFINITY;
@@ -168,9 +169,9 @@ public class LuceneIndex implements Full
         // TODO: proper cost calculation
         // we assume this will cause more read operations,
         // as we need to read the node and then the parent
-        return 15;        
+        return 15;
     }
-    
+
     /**
      * Get the set of relative paths of a full-text condition. For example, for
      * the condition "contains(a/b, 'hello') and contains(c/d, 'world'), the set
@@ -187,9 +188,11 @@ public class LuceneIndex implements Full
             // LowCostLuceneIndexProvider which is used for testing
             // TODO if the LowCostLuceneIndexProvider is removed, we should do
             // the following instead:
-            
-            // throw new IllegalStateException("Lucene index is used even when no full-text conditions are used for filter " + filter);
-            
+
+            // throw new
+            // IllegalStateException("Lucene index is used even when no full-text conditions are used for filter "
+            // + filter);
+
             return Collections.emptySet();
         }
         final HashSet<String> relPaths = new HashSet<String>();
@@ -210,23 +213,9 @@ public class LuceneIndex implements Full
                 }
                 return true;
             }
-            
-        });        
+        });
         return relPaths;
     }
-        
-    private double getCostOld(Filter filter, NodeState root) {
-        // TODO: proper cost calculation
-        if (!isLive(root)) {
-            // unusable index
-            return Double.POSITIVE_INFINITY;
-        }
-        if (!filter.getFulltextConditions().isEmpty()) {
-            return 0.5;
-        }
-        // no fulltext, don't use this index
-        return Double.POSITIVE_INFINITY;
-    }
 
     private static boolean isLive(NodeState root) {
         NodeState def = getIndexDef(root);
@@ -301,30 +290,24 @@ public class LuceneIndex implements Full
 
     @Override
     public String getPlan(Filter filter, NodeState root) {
-        if (FullTextSearchImpl.OAK_890_ADVANCED_FT_SEARCH) {
-            FullTextExpression ft = filter.getFullTextConstraint();
-            Set<String> relPaths = getRelativePaths(ft);
-            if (relPaths.size() > 1) {
-                return new MultiLuceneIndex(filter, root, relPaths).getPlan();
-            } 
-            String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
-            // we only restrict non-full-text conditions if there is
-            // no relative property in the full-text constraint 
-            boolean nonFullTextConstraints = parent.isEmpty();
-            String plan = getQuery(filter, null, nonFullTextConstraints) + " ft:(" + ft + ")";
-            if (!parent.isEmpty()) {
-                plan += " parent:" + parent;
-            }
-            return plan;
+        FullTextExpression ft = filter.getFullTextConstraint();
+        Set<String> relPaths = getRelativePaths(ft);
+        if (relPaths.size() > 1) {
+            return new MultiLuceneIndex(filter, root, relPaths).getPlan();
+        }
+        String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
+        // we only restrict non-full-text conditions if there is
+        // no relative property in the full-text constraint
+        boolean nonFullTextConstraints = parent.isEmpty();
+        String plan = getQuery(filter, null, nonFullTextConstraints, analyzer) + " ft:(" + ft + ")";
+        if (!parent.isEmpty()) {
+            plan += " parent:" + parent;
         }
-        return getQueryOld(filter, null).toString();
+        return plan;
     }
 
     @Override
     public Cursor query(Filter filter, NodeState root) {
-        if (!FullTextSearchImpl.OAK_890_ADVANCED_FT_SEARCH) {
-            return queryOld(filter, root);
-        }
         if (!isLive(root)) {
             throw new IllegalStateException("Lucene index is not live");
         }
@@ -335,7 +318,7 @@ public class LuceneIndex implements Full
         }
         String parent = relPaths.size() == 0 ? "" : relPaths.iterator().next();
         // we only restrict non-full-text conditions if there is
-        // no relative property in the full-text constraint 
+        // no relative property in the full-text constraint
         boolean nonFullTextConstraints = parent.isEmpty();
         Directory directory = newDirectory(root);
         if (directory == null) {
@@ -349,7 +332,8 @@ public class LuceneIndex implements Full
                     IndexSearcher searcher = new IndexSearcher(reader);
                     Collection<String> paths = new ArrayList<String>();
                     HashSet<String> seenPaths = new HashSet<String>();
-                    Query query = getQuery(filter, reader, nonFullTextConstraints);
+                    Query query = getQuery(filter, reader,
+                            nonFullTextConstraints, analyzer);
                     int parentDepth = PathUtils.getDepth(parent);
                     if (query != null) {
                         // OAK-925
@@ -392,53 +376,11 @@ public class LuceneIndex implements Full
                 directory.close();
             }
         } catch (IOException e) {
-			LOG.warn("query via {} failed.", this, e);
-            return Cursors.newPathCursor(Collections.<String> emptySet());
-        }        
-    }
-    
-    private Cursor queryOld(Filter filter, NodeState root) {
-        Directory directory = newDirectory(root);
-        if (directory == null) {
-            return Cursors.newPathCursor(Collections.<String> emptySet());
-        }
-        long s = System.currentTimeMillis();
-        try {
-            try {
-                IndexReader reader = DirectoryReader.open(directory);
-                try {
-                    IndexSearcher searcher = new IndexSearcher(reader);
-                    Collection<String> paths = new ArrayList<String>();
-
-                    Query query = getQueryOld(filter, reader);
-                    if (query != null) {
-                        TopDocs docs = searcher
-                                .search(query, Integer.MAX_VALUE);
-                        for (ScoreDoc doc : docs.scoreDocs) {
-                            String path = reader.document(doc.doc,
-                                    PATH_SELECTOR).get(PATH);
-                            if ("".equals(path)) {
-                                paths.add("/");
-                            } else if (path != null) {
-                                paths.add(path);
-                            }
-                        }
-                    }
-                    LOG.debug("query via {} took {} ms.", this,
-                            System.currentTimeMillis() - s);
-                    return Cursors.newPathCursor(paths);
-                } finally {
-                    reader.close();
-                }
-            } finally {
-                directory.close();
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
+            LOG.warn("query via {} failed.", this, e);
             return Cursors.newPathCursor(Collections.<String> emptySet());
         }
     }
-    
+
     /**
      * Get the Lucene query for the given filter.
      * 
@@ -447,9 +389,11 @@ public class LuceneIndex implements Full
      * @param nonFullTextConstraints whether non-full-text constraints (such a
      *            path, node type, and so on) should be added to the Lucene
      *            query
+     * @param analyzer the Lucene analyzer used for building the fulltext query
      * @return the Lucene query
      */
-    private static Query getQuery(Filter filter, IndexReader reader, boolean nonFullTextConstraints) {
+    private static Query getQuery(Filter filter, IndexReader reader,
+            boolean nonFullTextConstraints, Analyzer analyzer) {
         List<Query> qs = new ArrayList<Query>();
         FullTextExpression ft = filter.getFullTextConstraint();
         if (ft == null) {
@@ -457,7 +401,7 @@ public class LuceneIndex implements Full
             // when using the LowCostLuceneIndexProvider
             // which is used for testing
         } else {
-            qs.add(getFullTextQuery(ft));
+            qs.add(getFullTextQuery(ft, analyzer));
         }
         if (nonFullTextConstraints) {
             addNonFullTextConstraints(qs, filter, reader);
@@ -474,25 +418,9 @@ public class LuceneIndex implements Full
         }
         return bq;
     }
-    
-    private static Query getQueryOld(Filter filter, IndexReader reader) {
-        List<Query> qs = new ArrayList<Query>();
-        addNonFullTextConstraints(qs, filter, reader);
-        addFullTextConstraintsOld(qs, filter);
-        if (qs.size() == 0) {
-            return new MatchAllDocsQuery();
-        }
-        if (qs.size() == 1) {
-            return qs.get(0);
-        }
-        BooleanQuery bq = new BooleanQuery();
-        for (Query q : qs) {
-            bq.add(q, MUST);
-        }
-        return bq;
-    }
-    
-    private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader) {
+
+    private static void addNonFullTextConstraints(List<Query> qs,
+            Filter filter, IndexReader reader) {
         if (!filter.matchesAllTypes()) {
             addNodeTypeConstraints(qs, filter);
         }
@@ -509,7 +437,6 @@ public class LuceneIndex implements Full
             qs.add(new PrefixQuery(newPathTerm(path)));
             break;
         case DIRECT_CHILDREN:
-            // FIXME OAK-420
             if (!path.endsWith("/")) {
                 path += "/";
             }
@@ -598,9 +525,9 @@ public class LuceneIndex implements Full
 
             qs.add(TermRangeQuery.newStringRange(name, first, last,
                     pr.firstIncluding, pr.lastIncluding));
-        }        
+        }
     }
-    
+
     private static void addReferenceConstraint(String uuid, List<Query> qs,
             IndexReader reader) {
         if (reader == null) {
@@ -628,8 +555,8 @@ public class LuceneIndex implements Full
         }
         qs.add(bq);
     }
-    
-    static Query getFullTextQuery(FullTextExpression ft) {
+
+    static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer) {
         // a reference to the query, so it can be set in the visitor
         // (a "non-local return")
         final AtomicReference<Query> result = new AtomicReference<Query>();
@@ -640,18 +567,18 @@ public class LuceneIndex implements Full
                 BooleanQuery q = new BooleanQuery();
                 q.setMinimumNumberShouldMatch(1);
                 for (FullTextExpression e : or.list) {
-                    Query x = getFullTextQuery(e);
+                    Query x = getFullTextQuery(e, analyzer);
                     q.add(x, SHOULD);
                 }
                 result.set(q);
-                return true;     
+                return true;
             }
 
             @Override
             public boolean visit(FullTextAnd and) {
                 BooleanQuery q = new BooleanQuery();
                 for (FullTextExpression e : and.list) {
-                    Query x = getFullTextQuery(e);
+                    Query x = getFullTextQuery(e, analyzer);
                     // Lucene can't deal with "must(must_not(x))"
                     if (x instanceof BooleanQuery) {
                         BooleanQuery bq = (BooleanQuery) x;
@@ -663,32 +590,18 @@ public class LuceneIndex implements Full
                     }
                 }
                 result.set(q);
-                return true;     
+                return true;
             }
 
             @Override
             public boolean visit(FullTextTerm term) {
-                Query q;
                 String p = term.getPropertyName();
                 if (p != null && p.indexOf('/') >= 0) {
                     p = PathUtils.getName(p);
                 }
-                // TODO use tokenToQuery(String) if possible
-                String text = term.getText();
-                if (text.indexOf(' ') >= 0) {
-                    PhraseQuery pq = new PhraseQuery();
-                    for (String t : text.split(" ")) {
-                        pq.add(newFulltextTerm(t));
-                    }
-                    q = pq;
-                } else {
-                    // q = new TermQuery(newFulltextTerm(text));
-                    if (!text.endsWith("*")) {
-                        text = text + "*";
-                    }
-                    text = text.toLowerCase();
-                    // TODO if one condition, use wildcard - if multiple, use list of terms
-                    q = new WildcardQuery(newFulltextTerm(text));
+                Query q = tokenToQuery(term.getText(), analyzer);
+                if (q == null) {
+                    return false;
                 }
                 String boost = term.getBoost();
                 if (boost != null) {
@@ -703,55 +616,126 @@ public class LuceneIndex implements Full
                 }
                 return true;
             }
-            
         });
         return result.get();
     }
-    
-    private static void addFullTextConstraintsOld(List<Query> qs, Filter filter) {
-        if (filter.getFulltextConditions() == null
-                || filter.getFulltextConditions().isEmpty()) {
-            return;
+
+    private static Query tokenToQuery(String text, Analyzer analyzer) {
+        if (analyzer == null) {
+            return null;
         }
-        List<String> tokens = Lists.newArrayList();
-        for (String condition : filter.getFulltextConditions()) {
-            tokens.addAll(tokenize(condition.toLowerCase()));
+        List<String> tokens = new ArrayList<String>();
+        tokens = tokenize(text, analyzer);
+
+        if (tokens.isEmpty()) {
+            // TODO what should be returned in the case there are no tokens?
+            return new BooleanQuery();
         }
+
         if (tokens.size() == 1) {
-            String token = tokens.get(0);
-            if (token.contains(" ")) {
-                PhraseQuery pq = new PhraseQuery();
-                for (String t : token.split(" ")) {
-                    pq.add(newFulltextTerm(t));
-                }
-                qs.add(pq);
-            } else {
-                if (!token.endsWith("*")) {
-                    token = token + "*";
+            text = tokens.iterator().next();
+            boolean hasFulltextToken = false;
+            for (char c : fulltextTokens) {
+                if (text.indexOf(c) != -1) {
+                    hasFulltextToken = true;
+                    break;
                 }
-                qs.add(new WildcardQuery(newFulltextTerm(token)));
             }
-            return;
-        }
 
-        BooleanQuery q = new BooleanQuery();
-        Iterator<String> iterator = tokens.iterator();
-        while (iterator.hasNext()) {
-            String token = iterator.next();
-            q.add(tokenToQuery(token), MUST);
-        }
-        qs.add(q);
-    }
-
-    private static Query tokenToQuery(String token) {
-        if (token.contains(" ")) {
+            if (hasFulltextToken) {
+                return new WildcardQuery(newFulltextTerm(text));
+            } else {
+                return new PrefixQuery(newFulltextTerm(text));
+            }
+        } else {
             PhraseQuery pq = new PhraseQuery();
-            for (String t : token.split(" ")) {
+            for (String t : tokens) {
                 pq.add(newFulltextTerm(t));
             }
             return pq;
         }
-        return new TermQuery(newFulltextTerm(token));
+    }
+
+    private static char[] fulltextTokens = new char[] { '*', '?' };
+
+    /**
+     * Tries to merge back tokens that are split on relevant fulltext query
+     * wildcards ('*' or '?')
+     * 
+     * 
+     * @param text
+     * @param analyzer
+     * @return
+     */
+    private static List<String> tokenize(String text, Analyzer analyzer) {
+        List<String> tokens = new ArrayList<String>();
+        TokenStream stream = null;
+        try {
+            stream = analyzer.tokenStream(FieldNames.FULLTEXT,
+                    new StringReader(text));
+            CharTermAttribute termAtt = stream
+                    .addAttribute(CharTermAttribute.class);
+            OffsetAttribute offsetAtt = stream
+                    .addAttribute(OffsetAttribute.class);
+            // TypeAttribute type = stream.addAttribute(TypeAttribute.class);
+
+            stream.reset();
+
+            int poz = 0;
+            boolean hasFulltextToken = false;
+            StringBuilder token = new StringBuilder();
+            while (stream.incrementToken()) {
+                String term = termAtt.toString();
+                int start = offsetAtt.startOffset();
+                int end = offsetAtt.endOffset();
+                if (start > poz) {
+                    for (int i = poz; i < start; i++) {
+                        for (char c : fulltextTokens) {
+                            if (c == text.charAt(i)) {
+                                token.append(c);
+                                hasFulltextToken = true;
+                            }
+                        }
+                    }
+                }
+                poz = end;
+                if (hasFulltextToken) {
+                    token.append(term);
+                } else {
+                    if (token.length() > 0) {
+                        tokens.add(token.toString());
+                    }
+                    token = new StringBuilder();
+                    token.append(term);
+                }
+            }
+            // consume to the end of the string
+            if (poz < text.length()) {
+                for (int i = poz; i < text.length(); i++) {
+                    for (char c : fulltextTokens) {
+                        if (c == text.charAt(i)) {
+                            token.append(c);
+                        }
+                    }
+                }
+            }
+            if (token.length() > 0) {
+                tokens.add(token.toString());
+            }
+            stream.end();
+        } catch (IOException e) {
+            LOG.error("Building fulltext query failed", e.getMessage());
+            return null;
+        } finally {
+            try {
+                if (stream != null) {
+                    stream.close();
+                }
+            } catch (IOException e) {
+                // ignore
+            }
+        }
+        return tokens;
     }
 
     /**

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java Mon Aug 12 07:21:09 2013
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.plugin
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.Version;
 
 public interface LuceneIndexConstants {
@@ -28,7 +29,7 @@ public interface LuceneIndexConstants {
 
     Version VERSION = Version.LUCENE_43;
 
-    Analyzer ANALYZER = new StandardAnalyzer(VERSION);
+    Analyzer ANALYZER = new StandardAnalyzer(VERSION, CharArraySet.EMPTY_SET);
 
     /**
      * include only certain property types in the index

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java Mon Aug 12 07:21:09 2013
@@ -38,6 +38,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.spi.commit.Editor;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.PrefixQuery;
@@ -71,11 +72,11 @@ public class LuceneIndexEditor implement
 
     private boolean propertiesChanged = false;
 
-    LuceneIndexEditor(NodeBuilder definition) throws CommitFailedException {
+    LuceneIndexEditor(NodeBuilder definition, Analyzer analyzer) throws CommitFailedException {
         this.parent = null;
         this.name = null;
         this.path = "/";
-        this.context = new LuceneIndexEditorContext(definition);
+        this.context = new LuceneIndexEditorContext(definition, analyzer);
     }
 
     private LuceneIndexEditor(LuceneIndexEditor parent, String name) {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java Mon Aug 12 07:21:09 2013
@@ -17,7 +17,6 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
 import static org.apache.jackrabbit.oak.plugins.index.IndexUtils.getString;
-import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZER;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_TYPES;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_DATA_CHILD_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PERSISTENCE_PATH;
@@ -33,6 +32,7 @@ import org.apache.jackrabbit.oak.api.Pro
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.index.lucene.aggregation.NodeAggregator;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.SerialMergeScheduler;
@@ -48,13 +48,13 @@ public class LuceneIndexEditorContext {
     private static final Logger log = LoggerFactory
             .getLogger(LuceneIndexEditorContext.class);
 
-    private static IndexWriterConfig getIndexWriterConfig() {
+    private static IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
         // FIXME: Hack needed to make Lucene work in an OSGi environment
         Thread thread = Thread.currentThread();
         ClassLoader loader = thread.getContextClassLoader();
         thread.setContextClassLoader(IndexWriterConfig.class.getClassLoader());
         try {
-            IndexWriterConfig config = new IndexWriterConfig(VERSION, ANALYZER);
+            IndexWriterConfig config = new IndexWriterConfig(VERSION, analyzer);
             config.setMergeScheduler(new SerialMergeScheduler());
             return config;
         } finally {
@@ -88,7 +88,7 @@ public class LuceneIndexEditorContext {
 
     private static final NodeAggregator aggregator = new NodeAggregator();
 
-    private static final IndexWriterConfig config = getIndexWriterConfig();
+    private final IndexWriterConfig config;
 
     private static final Parser parser = new AutoDetectParser();
 
@@ -100,8 +100,9 @@ public class LuceneIndexEditorContext {
 
     private long indexedNodes;
 
-    LuceneIndexEditorContext(NodeBuilder definition) {
+    LuceneIndexEditorContext(NodeBuilder definition, Analyzer analyzer) {
         this.definition = definition;
+        this.config = getIndexWriterConfig(analyzer);
 
         PropertyState ps = definition.getProperty(INCLUDE_PROPERTY_TYPES);
         if (ps != null) {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java Mon Aug 12 07:21:09 2013
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene;
 
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZER;
 
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Service;
@@ -26,6 +27,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.spi.commit.Editor;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.analysis.Analyzer;
 
 /**
  * Service that provides Lucene based {@link IndexEditor}s
@@ -38,14 +40,33 @@ import org.apache.jackrabbit.oak.spi.sta
 @Service(IndexEditorProvider.class)
 public class LuceneIndexEditorProvider implements IndexEditorProvider {
 
+    /**
+     * TODO how to inject this in an OSGi friendly way?
+     */
+    private Analyzer analyzer = ANALYZER;
+
     @Override
     public Editor getIndexEditor(
             String type, NodeBuilder definition, NodeState root)
             throws CommitFailedException {
         if (TYPE_LUCENE.equals(type)) {
-            return new LuceneIndexEditor(definition);
+            return new LuceneIndexEditor(definition, analyzer);
         }
         return null;
     }
 
+    /**
+     * sets the default analyzer that will be used at index time
+     */
+    public void setAnalyzer(Analyzer analyzer) {
+        this.analyzer = analyzer;
+    }
+
+    // ----- helper builder method
+
+    public LuceneIndexEditorProvider with(Analyzer analyzer) {
+        this.setAnalyzer(analyzer);
+        return this;
+    }
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProvider.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProvider.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProvider.java Mon Aug 12 07:21:09 2013
@@ -25,6 +25,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.analysis.Analyzer;
 
 import com.google.common.collect.ImmutableList;
 
@@ -39,15 +40,33 @@ import com.google.common.collect.Immutab
 public class LuceneIndexProvider implements QueryIndexProvider,
         LuceneIndexConstants {
 
-    @Override @Nonnull
+    /**
+     * TODO how to inject this in an OSGi friendly way?
+     */
+    protected Analyzer analyzer = ANALYZER;
+
+    @Override
+    @Nonnull
     public List<QueryIndex> getQueryIndexes(NodeState nodeState) {
-        return ImmutableList.<QueryIndex>of(newLuceneIndex());
+        return ImmutableList.<QueryIndex> of(newLuceneIndex());
+    }
+
+    protected LuceneIndex newLuceneIndex() {
+        return new LuceneIndex(analyzer);
     }
 
     /**
-     * testing purposes
+     * sets the default analyzer that will be used at query time
      */
-    protected LuceneIndex newLuceneIndex() {
-        return new LuceneIndex();
+    public void setAnalyzer(Analyzer analyzer) {
+        this.analyzer = analyzer;
     }
+
+    // ----- helper builder method
+
+    public LuceneIndexProvider with(Analyzer analyzer) {
+        this.setAnalyzer(analyzer);
+        return this;
+    }
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LowCostLuceneIndexProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LowCostLuceneIndexProvider.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LowCostLuceneIndexProvider.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LowCostLuceneIndexProvider.java Mon Aug 12 07:21:09 2013
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.plugin
 
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.analysis.Analyzer;
 
 /**
  * A LuceneIndexProvider that return a LuceneIndex with a really low cost, so
@@ -28,13 +29,20 @@ public class LowCostLuceneIndexProvider 
 
     @Override
     protected LuceneIndex newLuceneIndex() {
-        return new LowCostLuceneIndex();
+        return new LowCostLuceneIndex(analyzer);
+    }
+
+    // ----- helper builder method
+
+    public LowCostLuceneIndexProvider with(Analyzer analyzer) {
+        this.setAnalyzer(analyzer);
+        return this;
     }
 
     private static class LowCostLuceneIndex extends LuceneIndex {
 
-        public LowCostLuceneIndex() {
-            super();
+        public LowCostLuceneIndex(Analyzer analyzer) {
+            super(analyzer);
         }
 
         @Override

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Mon Aug 12 07:21:09 2013
@@ -147,4 +147,18 @@ public class LuceneIndexQueryTest extend
 
     }
 
+    @Test
+    public void containsPath() throws Exception {
+        String h = "/p1/p2/p3";
+
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("name", h);
+        root.commit();
+
+        StringBuffer stmt = new StringBuffer();
+        stmt.append("//*[jcr:contains(., '/p1/p2')]");
+        assertQuery(stmt.toString(), "xpath", ImmutableList.of("/test/a"));
+
+    }
+
 }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java?rev=1513061&r1=1513060&r2=1513061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexTest.java Mon Aug 12 07:21:09 2013
@@ -41,14 +41,17 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.lucene.analysis.Analyzer;
 import org.junit.Test;
 
 import com.google.common.collect.ImmutableSet;
 
 public class LuceneIndexTest {
 
+    private static final Analyzer analyzer = LuceneIndexConstants.ANALYZER;
+
     private static final EditorHook HOOK = new EditorHook(
-            new IndexUpdateProvider(new LuceneIndexEditorProvider()));
+            new IndexUpdateProvider(new LuceneIndexEditorProvider().with(analyzer)));
 
     private NodeState root = new InitialContent().initialize(EMPTY_NODE);
 
@@ -65,7 +68,7 @@ public class LuceneIndexTest {
 
         NodeState indexed = HOOK.processCommit(before, after);
 
-        QueryIndex queryIndex = new LuceneIndex();
+        QueryIndex queryIndex = new LuceneIndex(analyzer);
         FilterImpl filter = createFilter(NT_BASE);
         filter.restrictPath("/", Filter.PathRestriction.EXACT);
         filter.restrictProperty("foo", Operator.EQUAL,
@@ -91,7 +94,7 @@ public class LuceneIndexTest {
 
         NodeState indexed = HOOK.processCommit(before, after);
 
-        QueryIndex queryIndex = new LuceneIndex();
+        QueryIndex queryIndex = new LuceneIndex(analyzer);
         FilterImpl filter = createFilter(NT_BASE);
         // filter.restrictPath("/", Filter.PathRestriction.EXACT);
         filter.restrictProperty("foo", Operator.EQUAL,
@@ -122,7 +125,7 @@ public class LuceneIndexTest {
 
         NodeState indexed = HOOK.processCommit(before, after);
 
-        QueryIndex queryIndex = new LuceneIndex();
+        QueryIndex queryIndex = new LuceneIndex(analyzer);
         FilterImpl filter = createFilter(NT_BASE);
         // filter.restrictPath("/", Filter.PathRestriction.EXACT);
         filter.restrictProperty("foo", Operator.EQUAL,