You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ma...@apache.org on 2018/09/21 14:15:15 UTC
svn commit: r1841593 - in /jackrabbit/oak/branches/1.8: ./
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/
oak-lucene/src/main/java/org/apache/jackra...
Author: mattryan
Date: Fri Sep 21 14:15:14 2018
New Revision: 1841593
URL: http://svn.apache.org/viewvc?rev=1841593&view=rev
Log:
OAK-7575 - Search over similar feature vectors (backported to 1.8)
Added:
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
- copied from r1834326, jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
- copied from r1834326, jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
jackrabbit/oak/branches/1.8/oak-lucene/src/test/resources/org/apache/jackrabbit/oak/query/fvs.csv
- copied unchanged from r1834326, jackrabbit/oak/trunk/oak-lucene/src/test/resources/org/apache/jackrabbit/oak/query/fvs.csv
Modified:
jackrabbit/oak/branches/1.8/ (props changed)
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/IndexWriterUtils.java
jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Propchange: jackrabbit/oak/branches/1.8/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Sep 21 14:15:14 2018
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1820660-1820661,1820729,1820734,1820859,1820861,1820878,1820888,1820947,1821027,1821130,1821140-1821141,1821178,1821237,1821240,1821249,1821258,1821325,1821358,1821361-1821362,1821370,1821375,1821393,1821477,1821487,1821516,1821617,1821663,1821665,1821668,1821681,1821847,1821975-1821983,1822121,1822201,1822207,1822527,1822723,1822808,1822850,1822934,1823135,1823163,1823169,1823172,1823655,1823669,1824196,1824198,1824253,1824255,1824896,1824962,1825065,1825362,1825381,1825442,1825448,1825466,1825470-1825471,1825475,1825523,1825525,1825561,1825619-1825621,1825651,1825654,1825992,1826079,1826090,1826096,1826216,1826237,1826338,1826516,1826532,1826551,1826560,1826638,1826640,1826730,1826932,1826957,1827423,1827472,1827486,1827977,1828349,1828439,1828502,1828529,1828948,1829527,1829534,1829546,1829569,1829587,1829665,1829854,1829864,1829978,1829985,1829987,1829998,1830019,1830048,1830160,1830171,1830197,1830209,1830239,1830347,1830748,1830911,1830923,1831157-1831158
,1831163,1831190,1831374,1831560,1831689,1832258,1832376,1832379,1832535,1833308,1833347,1833833,1834112,1834117,1834287,1834291,1834302,1834336,1834428,1834610,1834648-1834649,1834681,1834823,1834857-1834858,1835060,1835518,1835521,1835635,1835642,1835780,1835819,1836121,1836487,1836493,1837057,1837274,1837296,1837326,1837475,1837503,1837547,1837569,1837600,1837657,1837718,1837998,1838076,1838637,1839549,1839570,1839637,1839746,1840024,1840455,1840574
+/jackrabbit/oak/trunk:1820660-1820661,1820729,1820734,1820859,1820861,1820878,1820888,1820947,1821027,1821130,1821140-1821141,1821178,1821237,1821240,1821249,1821258,1821325,1821358,1821361-1821362,1821370,1821375,1821393,1821477,1821487,1821516,1821617,1821663,1821665,1821668,1821681,1821847,1821975-1821983,1822121,1822201,1822207,1822527,1822723,1822808,1822850,1822934,1823135,1823163,1823169,1823172,1823655,1823669,1824196,1824198,1824253,1824255,1824896,1824962,1825065,1825362,1825381,1825442,1825448,1825466,1825470-1825471,1825475,1825523,1825525,1825561,1825619-1825621,1825651,1825654,1825992,1826079,1826090,1826096,1826216,1826237,1826338,1826516,1826532,1826551,1826560,1826638,1826640,1826730,1826932,1826957,1827423,1827472,1827486,1827977,1828349,1828439,1828502,1828529,1828948,1829527,1829534,1829546,1829569,1829587,1829665,1829854,1829864,1829978,1829985,1829987,1829998,1830019,1830048,1830160,1830171,1830197,1830209,1830239,1830347,1830748,1830911,1830923,1831157-1831158
,1831163,1831190,1831374,1831560,1831689,1832258,1832376,1832379,1832535,1833308,1833347,1833833,1834112,1834117,1834287,1834291,1834302,1834326,1834336,1834428,1834610,1834648-1834649,1834681,1834823,1834857-1834858,1835060,1835518,1835521,1835635,1835642,1835780,1835819,1836121,1836487,1836493,1837057,1837274,1837296,1837326,1837475,1837503,1837547,1837569,1837600,1837657,1837718,1837998,1838076,1838637,1839549,1839570,1839637,1839746,1840024,1840455,1840574
/jackrabbit/trunk:1345480
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java Fri Sep 21 14:15:14 2018
@@ -16,15 +16,22 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import com.google.common.primitives.Ints;
+import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.lucene.binary.BlobByteSource;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.SimSearchUtils;
import org.apache.jackrabbit.util.ISO8601;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@@ -103,6 +110,34 @@ public final class FieldFactory {
return new StringField(name, value, NO);
}
+ static Collection<Field> newSimilarityFields(String name, Blob value) throws IOException {
+ Collection<Field> fields = new ArrayList<>(1);
+ byte[] bytes = new BlobByteSource(value).read();
+// fields.add(newBinarySimilarityField(name, bytes));
+ fields.add(newSimilarityField(name, bytes));
+ return fields;
+ }
+
+ static Collection<Field> newSimilarityFields(String name, String value) {
+ Collection<Field> fields = new ArrayList<>(1);
+// byte[] bytes = SimSearchUtils.toByteArray(value);
+// fields.add(newBinarySimilarityField(name, bytes));
+ fields.add(newSimilarityField(name, value));
+ return fields;
+ }
+
+ private static Field newSimilarityField(String name, byte[] bytes) {
+ return newSimilarityField(name, SimSearchUtils.toDoubleString(bytes));
+ }
+
+ private static Field newSimilarityField(String name, String value) {
+ return new TextField(FieldNames.createSimilarityFieldName(name), value, Field.Store.YES);
+ }
+
+ private static StoredField newBinarySimilarityField(String name, byte[] bytes) {
+ return new StoredField(FieldNames.createBinSimilarityFieldName(name), bytes);
+ }
+
public static Field newFulltextField(String value) {
return newFulltextField(value, false);
}
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java Fri Sep 21 14:15:14 2018
@@ -70,6 +70,16 @@ public final class FieldNames {
public static final String ANALYZED_FIELD_PREFIX = "full:";
/**
+ * Name of the field that contains the similarity search indexed tokens.
+ */
+ private static final String SIMILARITY_PREFIX = "sim:";
+
+ /**
+ * Prefix for all field names that contains the similarity search binary values.
+ */
+ private static final String SIMILARITY_BINARY_PREFIX = "simbin:";
+
+ /**
* Prefix used for storing fulltext of relative node
*/
public static final String FULLTEXT_RELATIVE_NODE = "fullnode:";
@@ -138,4 +148,12 @@ public final class FieldNames {
&& !field.startsWith(":")
&& !field.endsWith("_facet");
}
+
+ public static String createBinSimilarityFieldName(String name) {
+ return SIMILARITY_BINARY_PREFIX + name;
+ }
+
+ public static String createSimilarityFieldName(String name) {
+ return SIMILARITY_PREFIX + name;
+ }
}
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java Fri Sep 21 14:15:14 2018
@@ -911,6 +911,7 @@ public final class IndexDefinition imple
private final List<PropertyDefinition> notNullCheckEnabledProperties;
private final List<PropertyDefinition> nodeScopeAnalyzedProps;
private final List<PropertyDefinition> syncProps;
+ private final List<PropertyDefinition> similarityProperties;
private final boolean indexesAllNodesOfMatchingType;
private final boolean nodeNameIndexed;
@@ -925,6 +926,7 @@ public final class IndexDefinition imple
final Aggregate propAggregate;
+
IndexingRule(String nodeTypeName, NodeState config) {
this.nodeTypeName = nodeTypeName;
this.baseNodeType = nodeTypeName;
@@ -938,9 +940,10 @@ public final class IndexDefinition imple
List<PropertyDefinition> existentProperties = newArrayList();
List<PropertyDefinition> nodeScopeAnalyzedProps = newArrayList();
List<PropertyDefinition> syncProps = newArrayList();
+ List<PropertyDefinition> similarityProperties = newArrayList();
List<Aggregate.Include> propIncludes = newArrayList();
this.propConfigs = collectPropConfigs(config, namePatterns, propIncludes, nonExistentProperties,
- existentProperties, nodeScopeAnalyzedProps, functionRestrictions, syncProps);
+ existentProperties, nodeScopeAnalyzedProps, functionRestrictions, syncProps, similarityProperties);
this.propAggregate = new Aggregate(nodeTypeName, propIncludes);
this.aggregate = combine(propAggregate, nodeTypeName);
@@ -949,6 +952,7 @@ public final class IndexDefinition imple
this.nullCheckEnabledProperties = ImmutableList.copyOf(nonExistentProperties);
this.functionRestrictions = ImmutableList.copyOf(functionRestrictions);
this.notNullCheckEnabledProperties = ImmutableList.copyOf(existentProperties);
+ this.similarityProperties = ImmutableList.copyOf(similarityProperties);
this.fulltextEnabled = aggregate.hasNodeAggregates() || hasAnyFullTextEnabledProperty();
this.nodeFullTextIndexed = aggregate.hasNodeAggregates() || anyNodeScopeIndexedProperty();
this.propertyIndexEnabled = hasAnyPropertyIndexConfigured();
@@ -985,6 +989,7 @@ public final class IndexDefinition imple
this.indexesAllNodesOfMatchingType = areAlMatchingNodeByTypeIndexed();
this.nodeNameIndexed = original.nodeNameIndexed;
this.syncProps = original.syncProps;
+ this.similarityProperties = original.similarityProperties;
}
/**
@@ -1032,6 +1037,10 @@ public final class IndexDefinition imple
return nodeScopeAnalyzedProps;
}
+ public List<PropertyDefinition> getSimilarityProperties() {
+ return similarityProperties;
+ }
+
@Override
public String toString() {
String str = "IndexRule: "+ nodeTypeName;
@@ -1153,7 +1162,8 @@ public final class IndexDefinition imple
List<PropertyDefinition> existentProperties,
List<PropertyDefinition> nodeScopeAnalyzedProps,
List<PropertyDefinition> functionRestrictions,
- List<PropertyDefinition> syncProps) {
+ List<PropertyDefinition> syncProps,
+ List<PropertyDefinition> similarityProperties) {
Map<String, PropertyDefinition> propDefns = newHashMap();
NodeState propNode = config.getChildNode(LuceneIndexConstants.PROP_NODE);
@@ -1232,6 +1242,9 @@ public final class IndexDefinition imple
if (pd.sync) {
syncProps.add(pd);
}
+ if (pd.useInSimilarity) {
+ similarityProperties.add(pd);
+ }
}
}
ensureNodeTypeIndexingIsConsistent(propDefns, syncProps);
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Fri Sep 21 14:15:14 2018
@@ -62,6 +62,7 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newSimilarityFields;
import static org.apache.jackrabbit.oak.plugins.index.lucene.util.ConfigUtil.getPrimaryTypeName;
public class LuceneDocumentMaker {
@@ -253,7 +254,15 @@ public class LuceneDocumentMaker {
boolean includeTypeForFullText = indexingRule.includePropertyType(property.getType().tag());
boolean dirty = false;
- if (Type.BINARY.tag() == property.getType().tag()
+ if (Type.BINARY.tag() == property.getType().tag() && pd.useInSimilarity) {
+ try {
+ log.trace("indexing similarity binaries for {}", pd.name);
+ fields.addAll(newSimilarityFields(pd.name, property.getValue(Type.BINARY)));
+ dirty = true;
+ } catch (Exception e) {
+ log.error("could not index similarity field for property {} and definition {}", property, pd);
+ }
+ } else if (Type.BINARY.tag() == property.getType().tag()
&& includeTypeForFullText) {
fields.addAll(newBinary(property, state, null, path + "@" + pname));
dirty = true;
@@ -285,10 +294,17 @@ public class LuceneDocumentMaker {
if (pd.nodeScopeIndex) {
Field field = newFulltextField(value);
fields.add(field);
+ if (pd.useInSimilarity) {
+ log.trace("indexing similarity strings for {}", pd.name);
+ fields.addAll(newSimilarityFields(pd.name, value)); // fallback for when feature vectors are written in string typed properties
+ }
}
+
+
dirty = true;
}
}
+
if (pd.facet && isFacetingEnabled()) {
dirty |= addFacetFields(fields, property, pname, pd);
}
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java Fri Sep 21 14:15:14 2018
@@ -303,6 +303,11 @@ public interface LuceneIndexConstants {
String PROP_USE_IN_SPELLCHECK = "useInSpellcheck";
/**
+ * whether use this property values for similarity
+ */
+ String PROP_USE_IN_SIMILARITY = "useInSimilarity";
+
+ /**
* Property definition config indicating that null check support should be
* enabled for this property
*/
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java Fri Sep 21 14:15:14 2018
@@ -18,10 +18,6 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
-import javax.annotation.CheckForNull;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.jcr.PropertyType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@@ -34,6 +30,12 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import javax.jcr.PropertyType;
+
+import com.google.common.base.Joiner;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Iterables;
@@ -49,6 +51,8 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.commons.PerfLogger;
import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.commons.json.JsopWriter;
+import org.apache.jackrabbit.oak.plugins.index.Cursors;
+import org.apache.jackrabbit.oak.plugins.index.Cursors.PathCursor;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PropertyIndexResult;
@@ -60,16 +64,9 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.lucene.util.PathStoredFieldVisitor;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.SimSearchUtils;
import org.apache.jackrabbit.oak.plugins.memory.PropertyValues;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm;
-import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
import org.apache.jackrabbit.oak.spi.query.Cursor;
-import org.apache.jackrabbit.oak.plugins.index.Cursors;
-import org.apache.jackrabbit.oak.plugins.index.Cursors.PathCursor;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
import org.apache.jackrabbit.oak.spi.query.IndexRow;
@@ -77,6 +74,12 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
import org.apache.jackrabbit.oak.spi.query.QueryLimits;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextAnd;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm;
+import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
import org.apache.lucene.analysis.Analyzer;
@@ -145,7 +148,9 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.spi.query.QueryConstants.JCR_PATH;
import static org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex;
import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex;
-import static org.apache.lucene.search.BooleanClause.Occur.*;
+import static org.apache.lucene.search.BooleanClause.Occur.MUST;
+import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
+import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
/**
* Provides a QueryIndex that does lookups against a Lucene-based index
@@ -860,9 +865,20 @@ public class LucenePropertyIndex impleme
if (query.startsWith("mlt?")) {
String mltQueryString = query.replace("mlt?", "");
if (reader != null) {
- Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
- if (moreLikeThis != null) {
- qs.add(moreLikeThis);
+ List<PropertyDefinition> sp = new LinkedList<>();
+ for (IndexingRule r : defn.getDefinedRules()) {
+ sp.addAll(r.getSimilarityProperties());
+ }
+ if (sp.isEmpty()) {
+ Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
+ if (moreLikeThis != null) {
+ qs.add(moreLikeThis);
+ }
+ } else {
+ Query similarityQuery = SimSearchUtils.getSimilarityQuery(sp, reader, mltQueryString);
+ if (similarityQuery != null) {
+ qs.add(similarityQuery);
+ }
}
}
} else if (query.startsWith("spellcheck?")) {
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java Fri Sep 21 14:15:14 2018
@@ -58,7 +58,7 @@ public class PropertyDefinition {
* property etc then it should be defined via 'name' property in NodeState.
* In such case NodeState name can be set to anything
*/
- final String name;
+ public final String name;
private final int propertyType;
/**
@@ -123,7 +123,9 @@ public class PropertyDefinition {
public final boolean unique;
- public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
+ public boolean useInSimilarity;
+
+ public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
this.name = getName(defn, nodeName);
this.relative = isRelativeProperty(name);
@@ -151,6 +153,7 @@ public class PropertyDefinition {
this.propertyType = getPropertyType(idxDefn, nodeName, defn);
this.useInSuggest = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_USE_IN_SUGGEST, false);
this.useInSpellcheck = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_USE_IN_SPELLCHECK, false);
+ this.useInSimilarity = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_USE_IN_SIMILARITY, false);
this.nullCheckEnabled = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_NULL_CHECK_ENABLED, false);
this.notNullCheckEnabled = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
this.excludeFromAggregate = getOptionalValueIfIndexed(defn, LuceneIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java Fri Sep 21 14:15:14 2018
@@ -319,6 +319,11 @@ public final class IndexDefinitionBuilde
return this;
}
+ public PropertyRule useInSimilarity() {
+ propTree.setProperty(LuceneIndexConstants.PROP_USE_IN_SIMILARITY, true);
+ return this;
+ }
+
public PropertyRule type(String type){
//This would throw an IAE if type is invalid
PropertyType.valueFromName(type);
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/IndexWriterUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/IndexWriterUtils.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/IndexWriterUtils.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/writer/IndexWriterUtils.java Fri Sep 21 14:15:14 2018
@@ -20,12 +20,15 @@
package org.apache.jackrabbit.oak.plugins.index.lucene.writer;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants;
+import org.apache.jackrabbit.oak.plugins.index.lucene.PropertyDefinition;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.LSHAnalyzer;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
@@ -51,6 +54,15 @@ public class IndexWriterUtils {
Analyzer definitionAnalyzer = definition.getAnalyzer();
Map<String, Analyzer> analyzers = new HashMap<String, Analyzer>();
analyzers.put(FieldNames.SPELLCHECK, new ShingleAnalyzerWrapper(LuceneIndexConstants.ANALYZER, 3));
+ for (IndexDefinition.IndexingRule r : definition.getDefinedRules()) {
+ List<PropertyDefinition> similarityProperties = r.getSimilarityProperties();
+ for (PropertyDefinition pd : similarityProperties) {
+ if (pd.useInSimilarity) {
+ analyzers.put(FieldNames.createSimilarityFieldName(pd.name), new LSHAnalyzer());
+ }
+ }
+ }
+
if (!definition.isSuggestAnalyzed()) {
analyzers.put(FieldNames.SUGGEST, SuggestHelper.getAnalyzer());
}
Modified: jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1841593&r1=1841592&r2=1841593&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/branches/1.8/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Fri Sep 21 14:15:14 2018
@@ -19,9 +19,6 @@
package org.apache.jackrabbit.oak.plugins.index.lucene;
-import javax.annotation.Nonnull;
-import javax.jcr.PropertyType;
-
import static com.google.common.collect.ImmutableSet.of;
import static com.google.common.collect.Lists.newArrayList;
import static java.util.Arrays.asList;
@@ -72,11 +69,16 @@ import static org.junit.Assert.assertNul
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
+import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URI;
+import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.Calendar;
+import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
@@ -87,6 +89,9 @@ import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
+import javax.annotation.Nonnull;
+import javax.jcr.PropertyType;
+
import com.google.common.base.Charsets;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
@@ -98,6 +103,7 @@ import com.google.common.io.CountingInpu
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
+import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.Oak;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.CommitFailedException;
@@ -117,13 +123,13 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.CopyOnReadDirectory;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.IndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.fv.SimSearchUtils;
import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.apache.jackrabbit.oak.plugins.nodetype.TypeEditorProvider;
-import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.plugins.nodetype.write.NodeTypeRegistry;
import org.apache.jackrabbit.oak.query.AbstractQueryTest;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
@@ -2747,9 +2753,114 @@ public class LucenePropertyIndexTest ext
"lucene:test1(/oak:index/test1)", asList("/d"));
}
- private void assertPlanAndQuery(String query, String planExpectation, List<String> paths){
+ @Test
+ public void testRepSimilarWithBinaryFeatureVectors() throws Exception {
+
+ IndexDefinitionBuilder idxb = new IndexDefinitionBuilder().noAsync();
+ idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+ Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+ idxb.build(idx);
+ root.commit();
+
+ Tree test = root.getTree("/").addChild("test");
+
+ URI uri = getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+ File file = new File(uri);
+
+ Collection<String> children = new LinkedList<>();
+ for (String line : IOUtils.readLines(new FileInputStream(file), Charset.defaultCharset())) {
+ String[] split = line.split(",");
+ List<Double> values = new LinkedList<>();
+ int i = 0;
+ for (String s : split) {
+ if (i > 0) {
+ values.add(Double.parseDouble(s));
+ }
+ i++;
+ }
+
+ byte[] bytes = SimSearchUtils.toByteArray(values);
+ List<Double> actual = SimSearchUtils.toDoubles(bytes);
+ assertEquals(values, actual);
+
+ Blob blob = root.createBlob(new ByteArrayInputStream(bytes));
+ String name = split[0];
+ Tree child = test.addChild(name);
+ child.setProperty("fv", blob, Type.BINARY);
+ }
+ root.commit();
+
+ // check that similarity changes across different feature vectors
+ List<String> baseline = new LinkedList<>();
+ for (String similarPath : children) {
+ String query = "select [jcr:path] from [nt:base] where similar(., '" + similarPath + "')";
+
+ Iterator<String> result = executeQuery(query, "JCR-SQL2").iterator();
+ List<String> current = new LinkedList<>();
+ while (result.hasNext()) {
+ String next = result.next();
+ current.add(next);
+ }
+ assertNotEquals(baseline, current);
+ baseline.clear();
+ baseline.addAll(current);
+ }
+
+ }
+
+ @Test
+ public void testRepSimilarWithStringFeatureVectors() throws Exception {
+
+ IndexDefinitionBuilder idxb = new IndexDefinitionBuilder().noAsync();
+ idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+ Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+ idxb.build(idx);
+ root.commit();
+
+
+ Tree test = root.getTree("/").addChild("test");
+
+ URI uri = getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+ File file = new File(uri);
+
+ Collection<String> children = new LinkedList<>();
+
+ for (String line : IOUtils.readLines(new FileInputStream(file), Charset.defaultCharset())) {
+ int i1 = line.indexOf(',');
+ String name = line.substring(0, i1);
+ String value = line.substring(i1 + 1);
+ Tree child = test.addChild(name);
+ child.setProperty("fv", value, Type.STRING);
+ children.add(child.getPath());
+ }
+ root.commit();
+
+ // check that similarity changes across different feature vectors
+ List<String> baseline = new LinkedList<>();
+ for (String similarPath : children) {
+ String query = "select [jcr:path] from [nt:base] where similar(., '" + similarPath + "')";
+
+ Iterator<String> result = executeQuery(query, "JCR-SQL2").iterator();
+ List<String> current = new LinkedList<>();
+ while (result.hasNext()) {
+ String next = result.next();
+ current.add(next);
+ }
+ assertNotEquals(baseline, current);
+ baseline.clear();
+ baseline.addAll(current);
+ }
+ }
+
+ private void assertPlanAndQuery(String query, String planExpectation, List<String> paths) {
+ assertPlanAndQuery(query, planExpectation, paths, false);
+ }
+
+ private void assertPlanAndQuery(String query, String planExpectation, List<String> paths, boolean ordered) {
assertThat(explain(query), containsString(planExpectation));
- assertQuery(query, paths);
+ assertQuery(query, SQL2, paths, ordered);
}
private static Tree createNodeWithMixinType(Tree t, String nodeName, String typeName){