You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ca...@apache.org on 2016/07/04 20:29:54 UTC

svn commit: r1751376 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Author: catholicon
Date: Mon Jul  4 20:29:54 2016
New Revision: 1751376

URL: http://svn.apache.org/viewvc?rev=1751376&view=rev
Log:
OAK-4516: Configurable option to lucene index defs to index original (unanalyzed value as well)

"indexOriginalTerm" can now be set to true on analyzers node to include original term as well to be indexed as is

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1751376&r1=1751375&r2=1751376&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java Mon Jul  4 20:29:54 2016
@@ -75,6 +75,7 @@ import static com.google.common.collect.
 import static com.google.common.collect.Sets.newHashSet;
 import static org.apache.jackrabbit.JcrConstants.JCR_SCORE;
 import static org.apache.jackrabbit.JcrConstants.NT_BASE;
+import static org.apache.jackrabbit.oak.api.Type.BOOLEAN;
 import static org.apache.jackrabbit.oak.api.Type.NAMES;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.DECLARING_NODE_TYPES;
@@ -469,10 +470,16 @@ public final class IndexDefinition imple
     private static Map<String, Analyzer> collectAnalyzers(NodeState defn) {
         Map<String, Analyzer> analyzerMap = newHashMap();
         NodeStateAnalyzerFactory factory = new NodeStateAnalyzerFactory(LuceneIndexConstants.VERSION);
-        for (ChildNodeEntry cne : defn.getChildNode(LuceneIndexConstants.ANALYZERS).getChildNodeEntries()) {
+        NodeState analyzersTree = defn.getChildNode(LuceneIndexConstants.ANALYZERS);
+        for (ChildNodeEntry cne : analyzersTree.getChildNodeEntries()) {
             Analyzer a = factory.createInstance(cne.getNodeState());
             analyzerMap.put(cne.getName(), a);
         }
+
+        if (getOptionalValue(analyzersTree, INDEX_ORIGINAL_TERM, false) && !analyzerMap.containsKey(ANL_DEFAULT)) {
+            analyzerMap.put(ANL_DEFAULT, new OakAnalyzer(VERSION, true));
+        }
+
         return ImmutableMap.copyOf(analyzerMap);
     }
 

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1751376&r1=1751375&r2=1751376&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java Mon Jul  4 20:29:54 2016
@@ -196,6 +196,14 @@ public interface LuceneIndexConstants {
     String COST_PER_EXECUTION = "costPerExecution";
 
     /**
+     * Boolean property indicating if in-built analyzer should preserve original term
+     * (i.e. use
+     * {@link org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter#PRESERVE_ORIGINAL}
+     * flag)
+     */
+    String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
+
+    /**
      * Node name under which various analyzers are configured
      */
     String ANALYZERS = "analyzers";

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1751376&r1=1751375&r2=1751376&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java Mon Jul  4 20:29:54 2016
@@ -29,6 +29,8 @@ public class OakAnalyzer extends Analyze
 
     private final Version matchVersion;
 
+    private final int INDEX_ORIGINAL_TERM;
+
     /**
      * Creates a new {@link OakAnalyzer}
      * 
@@ -37,7 +39,20 @@ public class OakAnalyzer extends Analyze
      *            {@link #matchVersion above}
      */
     public OakAnalyzer(Version matchVersion) {
+        this(matchVersion, false);
+    }
+
+    /**
+     * Create a new {@link OakAnalyzer} with configurable flag to preserve
+     * original term being analyzed too.
+     * @param matchVersion Lucene version to match See {@link #matchVersion above}
+     * @param indexOriginalTerm flag to setup analyzer such that
+     *                              {@link WordDelimiterFilter#PRESERVE_ORIGINAL}
+     *                              is set to oonfigure word delimeter
+     */
+    public OakAnalyzer(Version matchVersion, boolean indexOriginalTerm) {
         this.matchVersion = matchVersion;
+        INDEX_ORIGINAL_TERM = indexOriginalTerm?WordDelimiterFilter.PRESERVE_ORIGINAL:0;
     }
 
     @Override
@@ -48,6 +63,7 @@ public class OakAnalyzer extends Analyze
         tok = new WordDelimiterFilter(tok,
                 WordDelimiterFilter.GENERATE_WORD_PARTS
                         | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE
+                        | this.INDEX_ORIGINAL_TERM
                         | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null);
         return new TokenStreamComponents(src, tok);
     }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1751376&r1=1751375&r2=1751376&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Mon Jul  4 20:29:54 2016
@@ -40,12 +40,17 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.PathFilter.PROP_EXCLUDED_PATHS;
 import static org.apache.jackrabbit.oak.plugins.index.PathFilter.PROP_INCLUDED_PATHS;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ANALYZERS;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INCLUDE_PROPERTY_NAMES;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.ORDERED_PROP_NAMES;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_ORIGINAL_TERM;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROPDEF_PROP_NODE_NAME;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_ANALYZED;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_NODE;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_NODE_SCOPE_INDEX;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_PROPERTY_INDEX;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_TYPE;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TIKA;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorTest.createCal;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.newNodeAggregator;
@@ -755,6 +760,60 @@ public class LucenePropertyIndexTest ext
         assertQuery("select [jcr:path] from [nt:base] where [propa] = 10", asList("/test/c", "/test/f"));
     }
 
+    //OAK-4516
+    @Test
+    public void wildcardQueryToLookupUnanalyzedText() throws Exception {
+        Tree idx = createIndex("test1", of("propa", "propb"));
+        idx.setProperty(PROP_TYPE, "lucene");
+        idx.addChild(ANALYZERS).setProperty(INDEX_ORIGINAL_TERM, true);
+        useV2(idx);
+        //Do not provide type information
+        root.commit();
+
+        //setup propa def to be analyzed
+        Tree propTree = root.getTree(idx.getPath() + "/indexRules/nt:base/properties/propa");
+        propTree.setProperty(PROP_ANALYZED, true);
+        root.commit();
+
+        //set propb def to be node scope indexed
+        propTree = root.getTree(idx.getPath() + "/indexRules/nt:base/properties/propb");
+        propTree.setProperty(PROP_NODE_SCOPE_INDEX, true);
+        root.commit();
+
+        Tree rootTree = root.getTree("/");
+        Tree node1Tree = rootTree.addChild("node1");
+        node1Tree.setProperty("propa", "abcdef");
+        node1Tree.setProperty("propb", "abcdef");
+        Tree node2Tree = rootTree.addChild("node2");
+        node2Tree.setProperty("propa", "abc_def");
+        node2Tree.setProperty("propb", "abc_def");
+        root.commit();
+
+        //normal query still works
+        String query = "select [jcr:path] from [nt:base] where contains('propa', 'abc*')";
+        String explanation = explain(query);
+        assertThat(explanation, containsString("lucene:test1"));
+        assertQuery(query, asList("/node1", "/node2"));
+
+        //unanalyzed wild-card query can still match original term
+        query = "select [jcr:path] from [nt:base] where contains('propa', 'abc_d*')";
+        explanation = explain(query);
+        assertThat(explanation, containsString("lucene:test1"));
+        assertQuery(query, asList("/node2"));
+
+        //normal query still works
+        query = "select [jcr:path] from [nt:base] where contains(*, 'abc*')";
+        explanation = explain(query);
+        assertThat(explanation, containsString("lucene:test1"));
+        assertQuery(query, asList("/node1", "/node2"));
+
+        //unanalyzed wild-card query can still match original term
+        query = "select [jcr:path] from [nt:base] where contains(*, 'abc_d*')";
+        explanation = explain(query);
+        assertThat(explanation, containsString("lucene:test1"));
+        assertQuery(query, asList("/node2"));
+    }
+
     //OAK-4517
     @Test
     public void pathIncludeSubrootIndex() throws Exception {