You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2019/03/12 13:09:46 UTC
svn commit: r1855317 - in /jackrabbit/oak/branches/1.10: ./
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/
oak-search/src/main/java/org/apache/ja...
Author: tommaso
Date: Tue Mar 12 13:09:46 2019
New Revision: 1855317
URL: http://svn.apache.org/viewvc?rev=1855317&view=rev
Log:
OAK-8118 - index selected properties to enhance fv simsearch results (branch 1.10)
Modified:
jackrabbit/oak/branches/1.10/ (props changed)
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Propchange: jackrabbit/oak/branches/1.10/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 12 13:09:46 2019
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1855221
+/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1854990,1855221
/jackrabbit/trunk:1345480
Modified: jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original)
+++ jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Tue Mar 12 13:09:46 2019
@@ -35,14 +35,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.FulltextBinaryTextExtractor;
import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker;
import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleDocValuesField;
-import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.*;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.index.IndexableField;
@@ -316,6 +309,12 @@ public class LuceneDocumentMaker extends
}
@Override
+ protected boolean indexSimilarityTag(Document doc, PropertyState property) {
+ doc.add(new TextField(FieldNames.SIMILARITY_TAGS, property.getValue(Type.STRING), Field.Store.YES));
+ return true;
+ }
+
+ @Override
protected void indexSimilarityStrings(Document doc, PropertyDefinition pd, String value) throws IOException {
for (Field f : FieldFactory.newSimilarityFields(pd.name, value)) {
doc.add(f);
Modified: jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java (original)
+++ jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java Tue Mar 12 13:09:46 2019
@@ -84,7 +84,7 @@ public class SimSearchUtils {
}
return doubles;
}
-
+
private static Collection<BytesRef> getTokens(Analyzer analyzer, String field, String sampleTextString) throws IOException {
Collection<BytesRef> tokens = new LinkedList<>();
TokenStream ts = analyzer.tokenStream(field, sampleTextString);
@@ -156,9 +156,18 @@ public class SimSearchUtils {
log.trace("generating sim query on field {} and text {}", similarityFieldName, fvString);
Query simQuery = SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
booleanQuery.add(new BooleanClause(simQuery, SHOULD));
+ String[] binaryTags = doc.getValues(FieldNames.SIMILARITY_TAGS);
+ if (binaryTags != null && binaryTags.length > 0) {
+ BooleanQuery tagQuery = new BooleanQuery();
+ for (String brt : binaryTags) {
+ tagQuery.add(new BooleanClause(new TermQuery(new Term(FieldNames.SIMILARITY_TAGS, brt)), SHOULD));
+ }
+ tagQuery.setBoost(0.5f);
+ booleanQuery.add(tagQuery, SHOULD);
+ }
log.trace("similarity query generated for {}", pd.name);
} else {
- log.warn("could not create query for similarity field {}", fvString);
+ log.warn("could not create query for similarity field {}", similarityFieldName);
}
}
}
Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java Tue Mar 12 13:09:46 2019
@@ -65,6 +65,11 @@ public final class FieldNames {
private static final String SIMILARITY_BINARY_PREFIX = "simbin:";
/**
+ * Prefix for all field names that contains the similarity search binary values.
+ */
+ public static final String SIMILARITY_TAGS = "simtags";
+
+ /**
* Name of the field that contains the suggest index.
*/
public static final String SUGGEST = ":suggest";
Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Tue Mar 12 13:09:46 2019
@@ -260,6 +260,11 @@ public interface FulltextIndexConstants
String PROP_SIMILARITY_RERANK = "similarityRerank";
/**
+ * whether property values should be indexed as tags to boost similarity search results
+ */
+ String PROP_SIMILARITY_TAGS = "similarityTags";
+
+ /**
* Property definition config indicating that null check support should be
* enabled for this property
*/
Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java Tue Mar 12 13:09:46 2019
@@ -128,6 +128,7 @@ public class PropertyDefinition {
public final boolean useInSimilarity;
public final boolean similarityRerank;
+ public final boolean similarityTags;
public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
@@ -159,6 +160,7 @@ public class PropertyDefinition {
this.useInSpellcheck = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SPELLCHECK, false);
this.useInSimilarity = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SIMILARITY, false);
this.similarityRerank = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_RERANK, true);
+ this.similarityTags = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_TAGS, false);
this.nullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NULL_CHECK_ENABLED, false);
this.notNullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
this.excludeFromAggregate = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);
Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Tue Mar 12 13:09:46 2019
@@ -265,12 +265,17 @@ public abstract class FulltextDocumentMa
if (pd.facet && isFacetingEnabled()) {
dirty |= indexFacets(doc, property, pname, pd);
}
+ if (pd.similarityTags) {
+ dirty |= indexSimilarityTag(doc, property);
+ }
}
return dirty;
}
+ protected abstract boolean indexSimilarityTag(D doc, PropertyState property);
+
protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition pd, Blob blob) throws IOException;
protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException;