You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2019/03/07 16:31:53 UTC
svn commit: r1854990 - in /jackrabbit/oak/trunk:
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
oak-search/src/main/java/org/apache/jackrabbit/oa...
Author: tommaso
Date: Thu Mar 7 16:31:52 2019
New Revision: 1854990
URL: http://svn.apache.org/viewvc?rev=1854990&view=rev
Log:
OAK-8181 - added optional 'tag' clauses for similarity search
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Thu Mar 7 16:31:52 2019
@@ -35,14 +35,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.FulltextBinaryTextExtractor;
import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker;
import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleDocValuesField;
-import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.*;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.index.IndexableField;
@@ -316,6 +309,12 @@ public class LuceneDocumentMaker extends
}
@Override
+ protected boolean indexSimilarityTag(Document doc, PropertyState property) {
+ doc.add(new TextField(FieldNames.SIMILARITY_TAGS, property.getValue(Type.STRING), Field.Store.YES));
+ return true;
+ }
+
+ @Override
protected void indexSimilarityStrings(Document doc, PropertyDefinition pd, String value) throws IOException {
for (Field f : FieldFactory.newSimilarityFields(pd.name, value)) {
doc.add(f);
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java Thu Mar 7 16:31:52 2019
@@ -84,7 +84,7 @@ public class SimSearchUtils {
}
return doubles;
}
-
+
private static Collection<BytesRef> getTokens(Analyzer analyzer, String field, String sampleTextString) throws IOException {
Collection<BytesRef> tokens = new LinkedList<>();
TokenStream ts = analyzer.tokenStream(field, sampleTextString);
@@ -156,9 +156,18 @@ public class SimSearchUtils {
log.trace("generating sim query on field {} and text {}", similarityFieldName, fvString);
Query simQuery = SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
booleanQuery.add(new BooleanClause(simQuery, SHOULD));
+ String[] binaryTags = doc.getValues(FieldNames.SIMILARITY_TAGS);
+ if (binaryTags != null && binaryTags.length > 0) {
+ BooleanQuery tagQuery = new BooleanQuery();
+ for (String brt : binaryTags) {
+ tagQuery.add(new BooleanClause(new TermQuery(new Term(FieldNames.SIMILARITY_TAGS, brt)), SHOULD));
+ }
+ tagQuery.setBoost(0.5f);
+ booleanQuery.add(tagQuery, SHOULD);
+ }
log.trace("similarity query generated for {}", pd.name);
} else {
- log.warn("could not create query for similarity field {}", fvString);
+ log.warn("could not create query for similarity field {}", similarityFieldName);
}
}
}
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java Thu Mar 7 16:31:52 2019
@@ -65,6 +65,11 @@ public final class FieldNames {
private static final String SIMILARITY_BINARY_PREFIX = "simbin:";
/**
+ * Prefix for all field names that contains the similarity search binary values.
+ */
+ public static final String SIMILARITY_TAGS = "simtags";
+
+ /**
* Name of the field that contains the suggest index.
*/
public static final String SUGGEST = ":suggest";
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Thu Mar 7 16:31:52 2019
@@ -260,6 +260,11 @@ public interface FulltextIndexConstants
String PROP_SIMILARITY_RERANK = "similarityRerank";
/**
+ * whether property values should be indexed as tags to boost similarity search results
+ */
+ String PROP_SIMILARITY_TAGS = "similarityTags";
+
+ /**
* Property definition config indicating that null check support should be
* enabled for this property
*/
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java Thu Mar 7 16:31:52 2019
@@ -128,6 +128,7 @@ public class PropertyDefinition {
public final boolean useInSimilarity;
public final boolean similarityRerank;
+ public final boolean similarityTags;
public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
@@ -159,6 +160,7 @@ public class PropertyDefinition {
this.useInSpellcheck = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SPELLCHECK, false);
this.useInSimilarity = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SIMILARITY, false);
this.similarityRerank = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_RERANK, true);
+ this.similarityTags = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_TAGS, false);
this.nullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NULL_CHECK_ENABLED, false);
this.notNullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
this.excludeFromAggregate = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);
Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1854990&r1=1854989&r2=1854990&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Thu Mar 7 16:31:52 2019
@@ -265,12 +265,17 @@ public abstract class FulltextDocumentMa
if (pd.facet && isFacetingEnabled()) {
dirty |= indexFacets(doc, property, pname, pd);
}
+ if (pd.similarityTags) {
+ dirty |= indexSimilarityTag(doc, property);
+ }
}
return dirty;
}
+ protected abstract boolean indexSimilarityTag(D doc, PropertyState property);
+
protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition pd, Blob blob) throws IOException;
protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException;