You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2019/03/12 13:09:46 UTC

svn commit: r1855317 - in /jackrabbit/oak/branches/1.10: ./ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/ oak-search/src/main/java/org/apache/ja...

Author: tommaso
Date: Tue Mar 12 13:09:46 2019
New Revision: 1855317

URL: http://svn.apache.org/viewvc?rev=1855317&view=rev
Log:
OAK-8118 - index selected properties to enhance fv simsearch results (branch 1.10)

Modified:
    jackrabbit/oak/branches/1.10/   (props changed)
    jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
    jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
    jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
    jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
    jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
    jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java

Propchange: jackrabbit/oak/branches/1.10/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 12 13:09:46 2019
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1855221
+/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1854990,1855221
 /jackrabbit/trunk:1345480

Modified: jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java (original)
+++ jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java Tue Mar 12 13:09:46 2019
@@ -35,14 +35,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.FulltextBinaryTextExtractor;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleDocValuesField;
-import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.*;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
 import org.apache.lucene.index.IndexableField;
@@ -316,6 +309,12 @@ public class LuceneDocumentMaker extends
     }
 
     @Override
+    protected boolean indexSimilarityTag(Document doc, PropertyState property) {
+        doc.add(new TextField(FieldNames.SIMILARITY_TAGS, property.getValue(Type.STRING), Field.Store.YES));
+        return true;
+    }
+
+    @Override
     protected void indexSimilarityStrings(Document doc, PropertyDefinition pd, String value) throws IOException {
         for (Field f : FieldFactory.newSimilarityFields(pd.name, value)) {
             doc.add(f);

Modified: jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java (original)
+++ jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java Tue Mar 12 13:09:46 2019
@@ -84,7 +84,7 @@ public class SimSearchUtils {
             }
         return doubles;
     }
-        
+
     private static Collection<BytesRef> getTokens(Analyzer analyzer, String field, String sampleTextString) throws IOException {
         Collection<BytesRef> tokens = new LinkedList<>();
         TokenStream ts = analyzer.tokenStream(field, sampleTextString);
@@ -156,9 +156,18 @@ public class SimSearchUtils {
                             log.trace("generating sim query on field {} and text {}", similarityFieldName, fvString);
                             Query simQuery = SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
                             booleanQuery.add(new BooleanClause(simQuery, SHOULD));
+                            String[] binaryTags = doc.getValues(FieldNames.SIMILARITY_TAGS);
+                            if (binaryTags != null && binaryTags.length > 0) {
+                                BooleanQuery tagQuery = new BooleanQuery();
+                                for (String brt : binaryTags) {
+                                    tagQuery.add(new BooleanClause(new TermQuery(new Term(FieldNames.SIMILARITY_TAGS, brt)), SHOULD));
+                                }
+                                tagQuery.setBoost(0.5f);
+                                booleanQuery.add(tagQuery, SHOULD);
+                            }
                             log.trace("similarity query generated for {}", pd.name);
                         } else {
-                            log.warn("could not create query for similarity field {}", fvString);
+                            log.warn("could not create query for similarity field {}", similarityFieldName);
                         }
                     }
                 }

Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java Tue Mar 12 13:09:46 2019
@@ -65,6 +65,11 @@ public final class FieldNames {
     private static final String SIMILARITY_BINARY_PREFIX = "simbin:";
 
     /**
+     * Prefix for all field names that contains the similarity search binary values.
+     */
+    public static final String SIMILARITY_TAGS = "simtags";
+
+    /**
      * Name of the field that contains the suggest index.
      */
     public static final String SUGGEST = ":suggest";

Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Tue Mar 12 13:09:46 2019
@@ -260,6 +260,11 @@ public interface FulltextIndexConstants
     String PROP_SIMILARITY_RERANK = "similarityRerank";
 
     /**
+     * whether property values should be indexed as tags to boost similarity search results
+     */
+    String PROP_SIMILARITY_TAGS = "similarityTags";
+
+    /**
      * Property definition config indicating that null check support should be
      * enabled for this property
      */

Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java Tue Mar 12 13:09:46 2019
@@ -128,6 +128,7 @@ public class PropertyDefinition {
     public final boolean useInSimilarity;
 
     public final boolean similarityRerank;
+    public final boolean similarityTags;
 
     public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
         this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
@@ -159,6 +160,7 @@ public class PropertyDefinition {
         this.useInSpellcheck = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SPELLCHECK, false);
         this.useInSimilarity = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SIMILARITY, false);
         this.similarityRerank = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_RERANK, true);
+        this.similarityTags = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_SIMILARITY_TAGS, false);
         this.nullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NULL_CHECK_ENABLED, false);
         this.notNullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
         this.excludeFromAggregate = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);

Modified: jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Tue Mar 12 13:09:46 2019
@@ -265,12 +265,17 @@ public abstract class FulltextDocumentMa
             if (pd.facet && isFacetingEnabled()) {
                 dirty |= indexFacets(doc, property, pname, pd);
             }
+            if (pd.similarityTags) {
+                dirty |= indexSimilarityTag(doc, property);
+            }
 
         }
 
         return dirty;
     }
 
+    protected abstract boolean indexSimilarityTag(D doc, PropertyState property);
+
     protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition pd, Blob blob) throws IOException;
 
     protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException;