You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2020/03/04 13:10:12 UTC
svn commit: r1874786 - in /jackrabbit/oak/trunk:
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
oak-search/src/main/java/org/apache/jackrabbit/oak...
Author: thomasm
Date: Wed Mar 4 13:10:12 2020
New Revision: 1874786
URL: http://svn.apache.org/viewvc?rev=1874786&view=rev
Log:
OAK-8934 Indexing: filter entries with a regular expression
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/DefaultIndexWriter.java Wed Mar 4 13:10:12 2020
@@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugin
import java.io.IOException;
import java.util.Calendar;
+import java.util.Iterator;
import java.util.List;
import com.google.common.io.Closer;
@@ -30,6 +31,7 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.util.ISO8601;
@@ -80,10 +82,23 @@ class DefaultIndexWriter implements Luce
@Override
public void updateDocument(String path, Iterable<? extends IndexableField> doc) throws IOException {
+ Iterator<? extends IndexableField> f = doc.iterator();
+ String fieldName = f.hasNext() ? f.next().name() : null;
+ boolean containsOnlyPath = FieldNames.PATH.equals(fieldName) && !f.hasNext();
+ boolean isPropertyRegexMatchingEnabled = definition.getPropertyRegex() != null;
if (reindex) {
+ if (containsOnlyPath && isPropertyRegexMatchingEnabled) {
+ return;
+ }
getWriter().addDocument(doc);
} else {
- getWriter().updateDocument(newPathTerm(path), doc);
+ // if the new document only contains path field, we don't add it to index. Instead we delete existing
+ // document of the same path.
+ if (containsOnlyPath && isPropertyRegexMatchingEnabled) {
+ getWriter().deleteDocuments(newPathTerm(path));
+ } else {
+ getWriter().updateDocument(newPathTerm(path), doc);
+ }
}
indexUpdated = true;
}
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java Wed Mar 4 13:10:12 2020
@@ -22,7 +22,9 @@ import static org.apache.jackrabbit.JcrC
import static org.apache.jackrabbit.JcrConstants.NT_UNSTRUCTURED;
import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.api.Type.STRINGS;
+import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.lucene.TestUtil.useV2;
+import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_VALUE_REGEX;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -55,10 +57,12 @@ import com.google.common.collect.Immutab
*/
public class LuceneIndexQueryTest extends AbstractQueryTest {
+ private Tree indexDefn;
+
@Override
protected void createTestIndexNode() throws Exception {
Tree index = root.getTree("/");
- Tree indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE);
+ indexDefn = createTestIndexNode(index, LuceneIndexConstants.TYPE_LUCENE);
useV2(indexDefn);
indexDefn.setProperty(LuceneIndexConstants.TEST_MODE, true);
indexDefn.setProperty(FulltextIndexConstants.EVALUATE_PATH_RESTRICTION, true);
@@ -106,6 +110,36 @@ public class LuceneIndexQueryTest extend
}
@Test
+ public void testValueRegex() throws Exception {
+ Tree test = root.getTree("/").addChild("test");
+ Tree a = test.addChild("a");
+ Tree b = test.addChild("b");
+ a.setProperty("name", "hello");
+ b.setProperty("name", "hello pattern");
+ root.commit();
+
+ final String query = "select [jcr:path] from [nt:base] where isdescendantnode('/test') and contains(*, 'hello')";
+
+ Iterator<String> result = executeQuery(query,"JCR-SQL2").iterator();
+ List<String> paths = new ArrayList<>();
+ result.forEachRemaining(paths::add);
+ assertEquals(2, paths.size());
+ assertEquals(paths.get(0), a.getPath());
+ assertEquals(paths.get(1), b.getPath());
+
+ indexDefn.setProperty(PROP_VALUE_REGEX, "pat*");
+ indexDefn.setProperty(REINDEX_PROPERTY_NAME, true);
+ root.commit();
+
+ result = executeQuery(query,"JCR-SQL2").iterator();
+ paths.clear();
+ result.forEachRemaining(paths::add);
+ assertEquals(1, paths.size());
+ assertEquals(paths.get(0), b.getPath());
+
+ }
+
+ @Test
public void descendantTest() throws Exception {
Tree test = root.getTree("/").addChild("test");
test.addChild("a");
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Wed Mar 4 13:10:12 2020
@@ -51,7 +51,7 @@ public interface FulltextIndexConstants
String PERSISTENCE_OAK = "repository";
- String TEST_MODE = "testMode";
+ String TEST_MODE = "testMode";
String PERSISTENCE_FILE = "file";
@@ -152,6 +152,13 @@ public interface FulltextIndexConstants
String EVALUATE_PATH_RESTRICTION = "evaluatePathRestrictions";
/**
+ * The property name to specify a regular expression for property value in index definition. If this property is present
+ * in index definition, then only those properties would be added to index whose value matches the regex defined by
+ * this property.
+ */
+ String PROP_VALUE_REGEX = "valueRegex";
+
+ /**
* Experimental config to restrict which property type gets indexed at
* property definition level. Mostly index rule level #INCLUDE_PROPERTY_TYPES
* should be sufficient
@@ -206,7 +213,7 @@ public interface FulltextIndexConstants
String TIKA_MAX_EXTRACT_LENGTH = "maxExtractLength";
/**
- * Config node under tika which defines mime type mappings
+ * Config node under tika which defines mime type mappings
*/
String TIKA_MIME_TYPES = "mimeTypes";
@@ -368,5 +375,5 @@ public interface FulltextIndexConstants
* index is used for queries; otherwise, it is not used (returns infinite
* cost). The value is: nodes, the path. For properties, the path of the node, then '@' property.
*/
- String USE_IF_EXISTS = "useIfExists";
+ String USE_IF_EXISTS = "useIfExists";
}
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java Wed Mar 4 13:10:12 2020
@@ -261,6 +261,11 @@ public class IndexDefinition implements
private final boolean testMode;
+ /**
+ * See {@link FulltextIndexConstants#PROP_VALUE_REGEX}
+ */
+ private final Pattern propertyRegex;
+
public boolean isTestMode() {
return testMode;
}
@@ -371,7 +376,11 @@ public class IndexDefinition implements
this.fullTextEnabled = hasFulltextEnabledIndexRule(definedIndexRules);
this.evaluatePathRestrictions = getOptionalValue(defn, EVALUATE_PATH_RESTRICTION, false);
-
+ if (defn.hasProperty(PROP_VALUE_REGEX)) {
+ this.propertyRegex = Pattern.compile(getOptionalValue(defn, PROP_VALUE_REGEX, ""));
+ } else {
+ this.propertyRegex = null;
+ }
String functionName = getOptionalValue(defn, FulltextIndexConstants.FUNC_NAME, null);
if (fullTextEnabled && functionName == null) {
functionName = getDefaultFunctionName();
@@ -844,6 +853,10 @@ public class IndexDefinition implements
return false;
}
+ public Pattern getPropertyRegex() {
+ return propertyRegex;
+ }
+
public boolean isSuggestEnabled() {
return suggestEnabled;
}
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1874786&r1=1874785&r2=1874786&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Wed Mar 4 13:10:12 2020
@@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.regex.Pattern;
import javax.jcr.PropertyType;
@@ -187,7 +188,11 @@ public abstract class FulltextDocumentMa
}
if (indexingRule.isFulltextEnabled()) {
- indexFulltextValue(document, name);
+ Pattern propertyRegex = definition.getPropertyRegex();
+ boolean shouldAdd = propertyRegex == null || propertyRegex.matcher(name).find();
+ if (shouldAdd) {
+ indexFulltextValue(document, name);
+ }
}
if (definition.evaluatePathRestrictions()){
@@ -242,8 +247,10 @@ public abstract class FulltextDocumentMa
if (pd.fulltextEnabled() && includeTypeForFullText) {
for (String value : property.getValue(Type.STRINGS)) {
-
logLargeStringProperties(property.getName(), value);
+ if (definition.getPropertyRegex() != null && !definition.getPropertyRegex().matcher(value).find()) {
+ continue;
+ }
if (!includePropertyValue(value, pd)){
continue;
}