You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2020/03/04 13:10:12 UTC

svn commit: r1874786 - in /jackrabbit/oak/trunk: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/ oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-search/src/main/java/org/apache/jackrabbit/oak...

Author: thomasm
Date: Wed Mar  4 13:10:12 2020
New Revision: 1874786

URL: http://svn.apache.org/viewvc?rev=1874786&view=rev
Log:
OAK-8934 Indexing: filter entries with a regular expression

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java Wed Mar  4 13:10:12 2020
@@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugin
 
 import java.io.IOException;
 import java.util.Calendar;
+import java.util.Iterator;
 import java.util.List;
 
 import com.google.common.io.Closer;
@@ -30,6 +31,7 @@ import org.apache.jackrabbit.oak.commons
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory;
 import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.util.ISO8601;
@@ -80,10 +82,23 @@ class DefaultIndexWriter implements Luce
 
     @Override
     public void updateDocument(String path, Iterable<? extends IndexableField> doc) throws IOException {
+        Iterator<? extends IndexableField> f = doc.iterator();
+        String fieldName = f.hasNext() ? f.next().name() : null;
+        boolean containsOnlyPath = FieldNames.PATH.equals(fieldName) && !f.hasNext();
+        boolean isPropertyRegexMatchingEnabled = definition.getPropertyRegex() != null;
         if (reindex) {
+            if (containsOnlyPath && isPropertyRegexMatchingEnabled) {
+                return;
+            }
             getWriter().addDocument(doc);
         } else {
-            getWriter().updateDocument(newPathTerm(path), doc);
+            // if the new document only contains path field, we don't add it to index. Instead we delete existing
+            // document of the same path.
+            if (containsOnlyPath && isPropertyRegexMatchingEnabled) {
+                getWriter().deleteDocuments(newPathTerm(path));
+            } else {
+                getWriter().updateDocument(newPathTerm(path), doc);
+            }
         }
         indexUpdated = true;
     }

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Wed Mar  4 13:10:12 2020
@@ -22,7 +22,9 @@ import static org.apache.jackrabbit.JcrC
 import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED;
 import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.api.Type.STRINGS;
+import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2;
+import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_VALUE_REGEX;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -55,10 +57,12 @@ import com.google.common.collect.Immutab
  */
 public class LuceneIndexQueryTest extends AbstractQueryTest {
 
+    private Tree indexDefn;
+
     @Override
     protected void createTestIndexNode() throws Exception {
         Tree index = root.getTree("/");
-        Tree indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE);
+        indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE);
         useV2(indexDefn);
         indexDefn.setProperty(LuceneIndexConstants.TEST_MODE, true);
         indexDefn.setProperty(FulltextIndexConstants.EVALUATE_PATH_RESTRICTION, true);
@@ -106,6 +110,36 @@ public class LuceneIndexQueryTest extend
     }
 
     @Test
+    public void testValueRegex() throws Exception {
+        Tree test = root.getTree("/").addChild("test");
+        Tree a = test.addChild("a");
+        Tree b = test.addChild("b");
+        a.setProperty("name", "hello");
+        b.setProperty("name", "hello pattern");
+        root.commit();
+
+        final String query = "select [jcr:path] from [nt:base] where isdescendantnode('/test') and contains(*, 'hello')";
+
+        Iterator<String> result = executeQuery(query,"JCR-SQL2").iterator();
+        List<String> paths = new ArrayList<>();
+        result.forEachRemaining(paths::add);
+        assertEquals(2, paths.size());
+        assertEquals(paths.get(0), a.getPath());
+        assertEquals(paths.get(1), b.getPath());
+
+        indexDefn.setProperty(PROP_VALUE_REGEX, "pat*");
+        indexDefn.setProperty(REINDEX_PROPERTY_NAME, true);
+        root.commit();
+
+        result = executeQuery(query,"JCR-SQL2").iterator();
+        paths.clear();
+        result.forEachRemaining(paths::add);
+        assertEquals(1, paths.size());
+        assertEquals(paths.get(0), b.getPath());
+
+    }
+
+    @Test
     public void descendantTest() throws Exception {
         Tree test = root.getTree("/").addChild("test");
         test.addChild("a");

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Wed Mar  4 13:10:12 2020
@@ -51,7 +51,7 @@ public interface FulltextIndexConstants
 
     String PERSISTENCE_OAK = "repository";
 
-  String TEST_MODE = "testMode";
+    String TEST_MODE = "testMode";
 
     String PERSISTENCE_FILE = "file";
 
@@ -152,6 +152,13 @@ public interface FulltextIndexConstants
     String EVALUATE_PATH_RESTRICTION = "evaluatePathRestrictions";
 
     /**
+     * The property name to specify a regular expression for property value in index definition. If this property is present
+     * in index definition, then only those properties would be added to index whose value matches the regex defined by
+     * this property.
+     */
+    String PROP_VALUE_REGEX = "valueRegex";
+
+    /**
      * Experimental config to restrict which property type gets indexed at
      * property definition level. Mostly index rule level #INCLUDE_PROPERTY_TYPES
      * should be sufficient
@@ -206,7 +213,7 @@ public interface FulltextIndexConstants
     String TIKA_MAX_EXTRACT_LENGTH = "maxExtractLength";
 
     /**
-     *  Config node under tika which defines mime type mappings
+     * Config node under tika which defines mime type mappings
      */
     String TIKA_MIME_TYPES = "mimeTypes";
 
@@ -368,5 +375,5 @@ public interface FulltextIndexConstants
      * index is used for queries; otherwise, it is not used (returns infinite
      * cost). The value is: nodes, the path. For properties, the path of the node, then '@' property.
      */
-     String USE_IF_EXISTS = "useIfExists";
+    String USE_IF_EXISTS = "useIfExists";
 }

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java Wed Mar  4 13:10:12 2020
@@ -261,6 +261,11 @@ public class IndexDefinition implements
 
     private final boolean testMode;
 
+    /**
+     * See {@link FulltextIndexConstants#PROP_VALUE_REGEX}
+     */
+    private final Pattern propertyRegex;
+
     public boolean isTestMode() {
         return testMode;
     }
@@ -371,7 +376,11 @@ public class IndexDefinition implements
 
             this.fullTextEnabled = hasFulltextEnabledIndexRule(definedIndexRules);
             this.evaluatePathRestrictions = getOptionalValue(defn, EVALUATE_PATH_RESTRICTION, false);
-
+            if (defn.hasProperty(PROP_VALUE_REGEX)) {
+                this.propertyRegex = Pattern.compile(getOptionalValue(defn, PROP_VALUE_REGEX, ""));
+            } else {
+                this.propertyRegex = null;
+            }
             String functionName = getOptionalValue(defn, FulltextIndexConstants.FUNC_NAME, null);
             if (fullTextEnabled && functionName == null) {
                 functionName = getDefaultFunctionName();
@@ -844,6 +853,10 @@ public class IndexDefinition implements
         return false;
     }
 
+    public Pattern getPropertyRegex() {
+        return propertyRegex;
+    }
+
     public boolean isSuggestEnabled() {
         return suggestEnabled;
     }

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Wed Mar  4 13:10:12 2020
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.regex.Pattern;
 
 import javax.jcr.PropertyType;
 
@@ -187,7 +188,11 @@ public abstract class FulltextDocumentMa
         }
 
         if (indexingRule.isFulltextEnabled()) {
-            indexFulltextValue(document, name);
+            Pattern propertyRegex = definition.getPropertyRegex();
+            boolean shouldAdd = propertyRegex == null || propertyRegex.matcher(name).find();
+            if (shouldAdd) {
+                indexFulltextValue(document, name);
+            }
         }
 
         if (definition.evaluatePathRestrictions()){
@@ -242,8 +247,10 @@ public abstract class FulltextDocumentMa
 
             if (pd.fulltextEnabled() && includeTypeForFullText) {
                 for (String value : property.getValue(Type.STRINGS)) {
-
                     logLargeStringProperties(property.getName(), value);
+                    if (definition.getPropertyRegex() != null && !definition.getPropertyRegex().matcher(value).find()) {
+                        continue;
+                    }
                     if (!includePropertyValue(value, pd)){
                         continue;
                     }