You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by to...@apache.org on 2018/09/19 06:46:43 UTC

svn commit: r1841291 [1/3] - in /jackrabbit/oak/trunk/oak-search/src: main/java/org/apache/jackrabbit/oak/plugins/index/search/ main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/ main/java/org/apache/jackrabbit/oak/plugins/index/searc...

Author: tommaso
Date: Wed Sep 19 06:46:42 2018
New Revision: 1841291

URL: http://svn.apache.org/viewvc?rev=1841291&view=rev
Log:
OAK-3336 - adjusted oak-search SPIs

Added:
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/IndexUpdateListener.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/ReaderRefreshPolicy.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/RefreshOnReadPolicy.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/RefreshOnWritePolicy.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/update/TimedRefreshPolicy.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/DataConversionUtil.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/IndexDefinitionUtils.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/NodeStateCloner.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNamesTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractorTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfigTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/update/
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/update/RecordingRunnable.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/update/RefreshOnReadPolicyTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/update/RefreshOnWritePolicyTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/update/TimedRefreshPolicyTest.java   (with props)
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/util/NodeStateClonerTest.java   (with props)
Removed:
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexUpdateListener.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/NodeStateCloner.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReaderRefreshPolicy.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/RefreshOnReadPolicy.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/RefreshOnWritePolicy.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TimedRefreshPolicy.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/BlobByteSource.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextBinaryTextExtractor.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/TextExtractionStats.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/TikaParserConfig.java
Modified:
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/Aggregate.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/BadIndexTracker.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexFormatVersion.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexLookup.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexNode.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexStatistics.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReindexOperations.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/SizeEstimator.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TextExtractionStatsMBean.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditor.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditorContext.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexWriterFactory.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndex.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/IndexNodeManager.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/ConfigUtil.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/FunctionIndexProcessor.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/IndexDefinitionBuilder.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/IndexHelper.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/NodeStateCopyUtils.java
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/AggregateTest.java
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCacheTest.java
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinitionTest.java
    jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/util/NodeStateCopyUtilsTest.java

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/Aggregate.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/Aggregate.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/Aggregate.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/Aggregate.java Wed Sep 19 06:46:42 2018
@@ -46,6 +46,15 @@ import static com.google.common.collect.
 import static org.apache.jackrabbit.oak.commons.PathUtils.elements;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 
+/**
+ * Aggregates text from child nodes for fulltext queries.
+ *
+ * Example: let's say node /x is of type 'web page', but the actual content is
+ * stored in child nodes; say /x/section1 contains "Hello" and /x/section2
+ * contains "World". If index aggregation is configured correctly, it will
+ * combine all the text of the child nodes, and index that as /x. When doing a
+ * fulltext search for for "Hello World", the index will then return /x.
+ */
 public class Aggregate {
 
     public static final String MATCH_ALL = "*";
@@ -56,7 +65,7 @@ public class Aggregate {
     public static final int RECURSIVE_AGGREGATION_LIMIT_DEFAULT = 5;
     private final String nodeTypeName;
     private final List<? extends Include> includes;
-    final int reAggregationLimit;
+    public final int reAggregationLimit;
     private final List<NodeInclude> relativeNodeIncludes;
     private final boolean nodeAggregates;
 
@@ -64,7 +73,7 @@ public class Aggregate {
        this(nodeTypeName, Collections.<Include>emptyList());
     }
 
-    Aggregate(String nodeTypeName, List<? extends Include> includes) {
+    public Aggregate(String nodeTypeName, List<? extends Include> includes) {
         this(nodeTypeName, includes, RECURSIVE_AGGREGATION_LIMIT_DEFAULT);
     }
 
@@ -137,7 +146,7 @@ public class Aggregate {
     }
 
     private static void collectAggregatesForDirectMatchers(NodeState nodeState, List<Matcher> matchers,
-                                          ResultCollector collector) {
+                                                           ResultCollector collector) {
         Map<String, ChildNodeEntry> children = Maps.newHashMap();
         //Collect potentially matching child nodestates based on matcher name
         for (Matcher m : matchers){
@@ -151,7 +160,7 @@ public class Aggregate {
     }
 
     private static void collectAggregatesForPatternMatchers(NodeState nodeState, List<Matcher> matchers,
-                                          ResultCollector collector) {
+                                                            ResultCollector collector) {
         matchChildren(matchers, collector, nodeState.getChildNodeEntries());
     }
 
@@ -214,14 +223,14 @@ public class Aggregate {
         });
     }
 
-    public static interface AggregateMapper {
+    public interface AggregateMapper {
         @Nullable
         Aggregate getAggregate(String nodeTypeName);
     }
 
     //~-----------------------------------------------------< Includes >
 
-    public static abstract class Include<T> {
+    public static abstract class Include {
         protected final String[] elements;
 
         public Include(String pattern) {
@@ -232,18 +241,15 @@ public class Aggregate {
             String element = elements[depth];
             if (MATCH_ALL.equals(element)) {
                 return true;
-            } else if (element.equals(name)) {
-                return true;
-            }
-            return false;
+            } else return element.equals(name);
         }
 
         public int maxDepth() {
             return elements.length;
         }
 
-        public void collectResults(T rootInclude, String rootIncludePath,
-                                   String nodePath, NodeState nodeState,  ResultCollector results) {
+        public void collectResults(Include rootInclude, String rootIncludePath,
+                                   String nodePath, NodeState nodeState, ResultCollector results) {
             collectResults(nodePath, nodeState, results);
         }
 
@@ -271,9 +277,9 @@ public class Aggregate {
         }
     }
 
-    public static class NodeInclude extends Include<NodeInclude> {
-        final String primaryType;
-        final boolean relativeNode;
+    public static class NodeInclude extends Include {
+        public final String primaryType;
+        public final boolean relativeNode;
         private final String pattern;
         private final AggregateMapper aggMapper;
 
@@ -302,9 +308,13 @@ public class Aggregate {
         }
 
         @Override
-        public void collectResults(NodeInclude rootInclude, String rootIncludePath, String nodePath,
+        public void collectResults(Include include, String rootIncludePath, String nodePath,
                                    NodeState nodeState, ResultCollector results) {
             //For supporting jcr:contains(jcr:content, 'foo')
+            if (!(include instanceof NodeInclude)) {
+                throw new IllegalArgumentException("" + include);
+            }
+            NodeInclude rootInclude = (NodeInclude) include;
             if (rootInclude.relativeNode){
                 results.onResult(new NodeIncludeResult(nodePath, rootIncludePath, nodeState));
             }
@@ -364,7 +374,7 @@ public class Aggregate {
         }
     }
 
-    public static class PropertyInclude extends Include<PropertyInclude> {
+    public static class PropertyInclude extends Include {
         private final PropertyDefinition propertyDefinition;
         private final String propertyName;
         private final Pattern pattern;
@@ -417,7 +427,7 @@ public class Aggregate {
         }
     }
 
-    public static interface ResultCollector {
+    public interface ResultCollector {
         void onResult(NodeIncludeResult result);
 
         void onResult(PropertyIncludeResult result);

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/BadIndexTracker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/BadIndexTracker.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/BadIndexTracker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/BadIndexTracker.java Wed Sep 19 06:46:42 2018
@@ -30,6 +30,16 @@ import com.google.common.collect.Maps;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Track of bad (corrupt) indexes.
+ *
+ * An index can be corrupt for reads (an exception was thrown when index was
+ * opened for query), and persistent (an exception was thrown when index is
+ * reopened after an update).
+ *
+ * Indexes marked bad for reads might become good again later, if another
+ * cluster node fixed the corruption (eg. by reindexing).
+ */
 public class BadIndexTracker {
     /**
      * Time interval in millis after which a bad index would be accessed again
@@ -117,15 +127,15 @@ public class BadIndexTracker {
         return badIndexesForRead.keySet();
     }
 
-    BadIndexInfo getInfo(String indexPath){
+    public BadIndexInfo getInfo(String indexPath){
         return badIndexesForRead.get(indexPath);
     }
 
-    Set<String> getBadPersistedIndexPaths() {
+    public Set<String> getBadPersistedIndexPaths() {
         return badPersistedIndexes.keySet();
     }
 
-    BadIndexInfo getPersistedIndexInfo(String indexPath){
+    public BadIndexInfo getPersistedIndexInfo(String indexPath){
         return badPersistedIndexes.get(indexPath);
     }
 
@@ -133,7 +143,7 @@ public class BadIndexTracker {
         return recheckIntervalMillis;
     }
 
-    void setTicker(Ticker ticker) {
+    public void setTicker(Ticker ticker) {
         this.ticker = ticker;
     }
 
@@ -141,8 +151,8 @@ public class BadIndexTracker {
         return !(badIndexesForRead.isEmpty() && badPersistedIndexes.isEmpty());
     }
 
-    class BadIndexInfo {
-        final String path;
+    public class BadIndexInfo {
+        public final String path;
         final int lastIndexerCycleCount = indexerCycleCount;
         private final long createdTime = TimeUnit.NANOSECONDS.toMillis(ticker.read());
         private final boolean persistedIndex;

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ExtractedTextCache.java Wed Sep 19 06:46:42 2018
@@ -54,6 +54,10 @@ import org.slf4j.LoggerFactory;
 
 import static org.apache.jackrabbit.oak.commons.PathUtils.concat;
 
+/**
+ * A cache to avoid extracting text of binaries that were already processed (in
+ * a different node that references the same binary).
+ */
 public class ExtractedTextCache {
     private static final boolean CACHE_ONLY_SUCCESS =
             Boolean.getBoolean("oak.extracted.cacheOnlySuccess");
@@ -75,7 +79,10 @@ public class ExtractedTextCache {
     private long totalTextSize;
     private long totalTime;
     private int preFetchedCount;
+
+    // the actual cache. key: content id, value: extracted text
     private final Cache<String, String> cache;
+
     private final ConcurrentHashMap<String, String> timeoutMap;
     private final File indexDir;
     private final CacheStats cacheStats;
@@ -104,7 +111,7 @@ public class ExtractedTextCache {
             cacheStats = null;
         }
         this.alwaysUsePreExtractedCache = alwaysUsePreExtractedCache;
-        this.timeoutMap = new ConcurrentHashMap<String, String>();
+        this.timeoutMap = new ConcurrentHashMap<>();
         this.indexDir = indexDir;
         loadTimeoutMap();
     }
@@ -241,13 +248,13 @@ public class ExtractedTextCache {
         return extractedTextProvider;
     }
 
-    void resetCache(){
+    public void resetCache(){
         if (cache != null){
             cache.invalidateAll();
         }
     }
 
-    boolean isAlwaysUsePreExtractedCache() {
+    public boolean isAlwaysUsePreExtractedCache() {
         return alwaysUsePreExtractedCache;
     }
 
@@ -283,7 +290,7 @@ public class ExtractedTextCache {
         closeExecutorService();
     }
 
-    public void process(String name, Callable<Void> callable) throws InterruptedException, Throwable {
+    public void process(String name, Callable<Void> callable) throws Throwable {
         Callable<Void> callable2 = new Callable<Void>() {
             @Override
             public Void call() throws Exception {
@@ -305,9 +312,7 @@ public class ExtractedTextCache {
                 future.get(extractionTimeoutMillis, TimeUnit.MILLISECONDS);
             }
         } catch (TimeoutException e) {
-            timeoutCount++;
-            throw e;
-        } catch (InterruptedException e) {
+            timeoutCount++; // TODO : use AtomicInteger ? this is a non-atomic operation on a volatile field
             throw e;
         } catch (ExecutionException e) {
             throw e.getCause();
@@ -332,7 +337,7 @@ public class ExtractedTextCache {
         log.debug("ExtractedTextCache createExecutor " + this);
         ThreadPoolExecutor executor = new ThreadPoolExecutor(1, EXTRACTION_MAX_THREADS,
                 60L, TimeUnit.SECONDS,
-                new LinkedBlockingQueue<Runnable>(), new ThreadFactory() {
+            new LinkedBlockingQueue<>(), new ThreadFactory() {
             private final AtomicInteger counter = new AtomicInteger();
             private final Thread.UncaughtExceptionHandler handler = new Thread.UncaughtExceptionHandler() {
                 @Override

Added: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java?rev=1841291&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java (added)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java Wed Sep 19 06:46:42 2018
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.search;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Defines field names that are used internally to store data in the
+ * search index.
+ */
+public final class FieldNames {
+
+
+    /**
+     * Private constructor.
+     */
+    private FieldNames() {
+    }
+
+    /**
+     * Name of the field that contains the {@value} property of the node.
+     */
+    public static final String PATH = ":path";
+
+    /**
+     * Name of the field that contains all the path hierarchy e.g. for /a/b/c
+     * it would contain /a, /a/b, /a/b/c
+     */
+    public static final String ANCESTORS = ":ancestors";
+
+    /**
+     * Name of the field which refers to the depth of path
+     */
+    public static final String PATH_DEPTH = ":depth";
+
+    /**
+     * Name of the field that contains the fulltext index.
+     */
+    public static final String FULLTEXT = ":fulltext";
+
+    /**
+     * Name of the field that contains the similarity search indexed tokens.
+     */
+    private static final String SIMILARITY_PREFIX = "sim:";
+
+    /**
+     * Name of the field that contains the suggest index.
+     */
+    public static final String SUGGEST = ":suggest";
+
+    /**
+     * Name of the field that contains the spellcheck index.
+     */
+    public static final String SPELLCHECK = ":spellcheck";
+
+    /**
+     * Prefix for all field names that are fulltext indexed by property name.
+     */
+    public static final String ANALYZED_FIELD_PREFIX = "full:";
+
+    /**
+     * Prefix used for storing fulltext of relative node
+     */
+    public static final String FULLTEXT_RELATIVE_NODE = "fullnode:";
+
+    /**
+     * Name of the field that contains those property names which are not found
+     * (or were null) for the given
+     */
+    public static final String NULL_PROPS = ":nullProps";
+
+    /**
+     * Name of the field that contains those property names which are exist i.e. not null
+     * for the given NodeState
+     */
+    public static final String NOT_NULL_PROPS = ":notNullProps";
+
+    /**
+     * Name of the field that contains the node name
+     */
+    public static final String NODE_NAME = ":nodeName";
+    
+    /**
+     * Suffix of the fields that contains function values
+     */
+    public static final String FUNCTION_PREFIX = "function*";
+
+    /**
+     * Used to select only the PATH field from the lucene documents
+     */
+    public static final Set<String> PATH_SELECTOR = new HashSet<>(
+        Collections.singletonList(PATH));
+
+    /**
+     * Encodes the field name such that it can be used for storing DocValue
+     * This is done such a field if used for both sorting and querying uses
+     * a different name for docvalue field
+     *
+     * @param name name to encode
+     * @return encoded field name
+     */
+    public static String createDocValFieldName(String name){
+        return ":dv" + name;
+    }
+
+    public static String createAnalyzedFieldName(String pname) {
+        return ANALYZED_FIELD_PREFIX + pname;
+    }
+
+    public static String createFulltextFieldName(String nodeRelativePath) {
+        if (nodeRelativePath == null){
+            return FULLTEXT;
+        }
+        return FULLTEXT_RELATIVE_NODE + nodeRelativePath;
+    }
+
+    public static String createFacetFieldName(String pname) {
+        return pname + "_facet";
+    }
+
+    /**
+     * @return if {@code field} represents a field property indexed data
+     */
+    public static boolean isPropertyField(String field) {
+        return !field.startsWith(ANALYZED_FIELD_PREFIX)
+                && !field.startsWith(FULLTEXT_RELATIVE_NODE)
+                && !field.startsWith(":")
+                && !field.endsWith("_facet");
+    }
+
+    public static String createSimilarityFieldName(String name) {
+        return SIMILARITY_PREFIX + name;
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java Wed Sep 19 06:46:42 2018
@@ -16,19 +16,19 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.search;
 
-
+/**
+ * Internal constants used in index definition, and index implementations.
+ */
 public interface FulltextIndexConstants {
 
-  enum IndexingMode {
-        SYNC,
-        NRT,
-        ASYNC;
+    enum IndexingMode {
+        SYNC, NRT, ASYNC;
 
-        public String asyncValueName(){
+        public String asyncValueName() {
             return name().toLowerCase();
         }
 
-        public static IndexingMode from(String indexingMode){
+        public static IndexingMode from(String indexingMode) {
             return valueOf(indexingMode.toUpperCase());
         }
     }
@@ -192,6 +192,7 @@ public interface FulltextIndexConstants
 
     /**
      * Config node which include Tika related configuration
+     * Its value should match {@link FieldNames#NODE_NAME}
      */
     String TIKA = "tika";
 
@@ -248,6 +249,11 @@ public interface FulltextIndexConstants
      */
     String PROP_USE_IN_SPELLCHECK = "useInSpellcheck";
 
+  /**
+   * whether use this property values for similarity
+   */
+  String PROP_USE_IN_SIMILARITY = "useInSimilarity";
+
     /**
      * Property definition config indicating that null check support should be
      * enabled for this property
@@ -290,6 +296,12 @@ public interface FulltextIndexConstants
     String COMPAT_MODE = "compatVersion";
 
     /**
+     * Optional (index definition) property indicating whether facets should be ACL checked.
+     * Default is true
+     */
+    String PROP_SECURE_FACETS = "secure";
+
+    /**
      * Optional (index definition) property indicating max number of facets that will be retrieved
      * in query
      * Default is {@link IndexDefinition#DEFAULT_FACET_COUNT}

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexFormatVersion.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexFormatVersion.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexFormatVersion.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexFormatVersion.java Wed Sep 19 06:46:42 2018
@@ -19,6 +19,11 @@
 
 package org.apache.jackrabbit.oak.plugins.index.search;
 
+/**
+ * The version of an index (property "compatVersion").
+ *
+ * The default is version 2. Version 1 is supported for backward compatibility.
+ */
 public enum IndexFormatVersion {
     /**
      * Index confirming to Oak version upto 1.0.8

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexLookup.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexLookup.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexLookup.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexLookup.java Wed Sep 19 06:46:42 2018
@@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugin
 
 import java.util.Collection;
 import java.util.Set;
+import java.util.function.Predicate;
 
 import com.google.common.collect.Sets;
 import org.apache.jackrabbit.oak.commons.PathUtils;
@@ -29,30 +30,38 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 
 import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
-import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
 
+/**
+ * Allows to check which indexes can possibly be used for a certain query.
+ *
+ * For example, for a query of the form "/jcr:root/content//*", the indexes
+ * under "/" and the indexes under "/content" can be used.
+ */
 public class IndexLookup {
+
     private final NodeState root;
+    private final Predicate<NodeState> definitionPredicate;
 
-    public IndexLookup(NodeState root) {
+    public IndexLookup(NodeState root, Predicate<NodeState> definitionPredicate) {
         this.root = root;
+        this.definitionPredicate = definitionPredicate;
     }
 
-    public Collection<String> collectIndexNodePaths(Filter filter, String type){
-        return collectIndexNodePaths(filter, type, true);
+    public Collection<String> collectIndexNodePaths(Filter filter) {
+        return collectIndexNodePaths(filter, true);
     }
 
-    private Collection<String> collectIndexNodePaths(Filter filter, String type, boolean recurse){
+    public Collection<String> collectIndexNodePaths(Filter filter, boolean recurse) {
         Set<String> paths = Sets.newHashSet();
 
-        collectIndexNodePaths(root, type, "/", paths);
+        collectIndexNodePaths(root, "/", paths);
 
         if (recurse) {
             StringBuilder sb = new StringBuilder();
             NodeState nodeState = root;
             for (String element : PathUtils.elements(filter.getPath())) {
                 nodeState = nodeState.getChildNode(element);
-                collectIndexNodePaths(nodeState, type,
+                collectIndexNodePaths(nodeState,
                         sb.append("/").append(element).toString(),
                         paths);
             }
@@ -61,20 +70,15 @@ public class IndexLookup {
         return paths;
     }
 
-    public static void collectIndexNodePaths(NodeState nodeState, String type, String parentPath, Collection<String> paths) {
+    private void collectIndexNodePaths(NodeState nodeState, String parentPath, Collection<String> paths) {
         NodeState state = nodeState.getChildNode(INDEX_DEFINITIONS_NAME);
         for (ChildNodeEntry entry : state.getChildNodeEntries()) {
-            if (isIndexOfType(entry.getNodeState(), type)) {
+            if (definitionPredicate.test(entry.getNodeState())) {
                 paths.add(createIndexNodePath(parentPath, entry.getName()));
             }
         }
     }
 
-    private static boolean isIndexOfType(NodeState nodeState, String type) {
-        return type.equals(nodeState.getString(TYPE_PROPERTY_NAME));
-    }
-
-
     private static String createIndexNodePath(String parentPath, String name){
         return PathUtils.concat(parentPath, INDEX_DEFINITIONS_NAME, name);
     }

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexNode.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexNode.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexNode.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexNode.java Wed Sep 19 06:46:42 2018
@@ -19,6 +19,13 @@
 
 package org.apache.jackrabbit.oak.plugins.index.search;
 
+/**
+ * Represents an instance of an index.
+ *
+ * It is typically acquired when in the planning phase and execution phase of a
+ * query, and released afterwards. This allows an implementation to re-use
+ * resources (eg. keep files open).
+ */
 public interface IndexNode {
 
     void release();

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexStatistics.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexStatistics.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexStatistics.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexStatistics.java Wed Sep 19 06:46:42 2018
@@ -19,7 +19,7 @@
 package org.apache.jackrabbit.oak.plugins.index.search;
 
 /**
- *
+ * Reports index statistics (for example, how many entries does the index contain).
  */
 public interface IndexStatistics {
   int numDocs();

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java Wed Sep 19 06:46:42 2018
@@ -24,9 +24,9 @@ import javax.jcr.PropertyType;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.property.ValuePattern;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
 import org.apache.jackrabbit.oak.plugins.index.search.util.FunctionIndexProcessor;
-import org.apache.jackrabbit.oak.plugins.index.property.ValuePattern;
 import org.apache.jackrabbit.oak.plugins.index.search.util.IndexHelper;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.jetbrains.annotations.Nullable;
@@ -40,6 +40,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.FIELD_BOOST;
 import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_IS_REGEX;
 import static org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_WEIGHT;
+import static org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.DEFAULT_PROPERTY_WEIGHT;
 import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
 
 public class PropertyDefinition {
@@ -58,19 +59,19 @@ public class PropertyDefinition {
      * property etc then it should be defined via 'name' property in NodeState.
      * In such case NodeState name can be set to anything
      */
-    final String name;
+    public final String name;
 
     private final int propertyType;
     /**
      * The boost value for a property.
      */
-    final float boost;
+    public final float boost;
 
-    final boolean isRegexp;
+    public final boolean isRegexp;
 
     public final boolean index;
 
-    final boolean stored;
+    public final boolean stored;
 
     public final boolean nodeScopeIndex;
 
@@ -82,7 +83,7 @@ public class PropertyDefinition {
 
     public final boolean nullCheckEnabled;
 
-    final boolean notNullCheckEnabled;
+    public final boolean notNullCheckEnabled;
 
     final int includedPropertyTypes;
 
@@ -123,12 +124,14 @@ public class PropertyDefinition {
 
     public final boolean unique;
 
+    public final boolean useInSimilarity;
+
     public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState defn) {
         this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
         this.name = getName(defn, nodeName);
         this.relative = isRelativeProperty(name);
         this.boost = getOptionalValue(defn, FIELD_BOOST, DEFAULT_BOOST);
-        this.weight = getOptionalValue(defn, PROP_WEIGHT, 5);
+        this.weight = getOptionalValue(defn, PROP_WEIGHT, DEFAULT_PROPERTY_WEIGHT);
 
         //By default if a property is defined it is indexed
         this.index = getOptionalValue(defn, FulltextIndexConstants.PROP_INDEX, true);
@@ -151,6 +154,7 @@ public class PropertyDefinition {
         this.propertyType = getPropertyType(idxDefn, nodeName, defn);
         this.useInSuggest = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SUGGEST, false);
         this.useInSpellcheck = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SPELLCHECK, false);
+        this.useInSimilarity = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_USE_IN_SIMILARITY, false);
         this.nullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NULL_CHECK_ENABLED, false);
         this.notNullCheckEnabled = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
         this.excludeFromAggregate = getOptionalValueIfIndexed(defn, FulltextIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReindexOperations.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReindexOperations.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReindexOperations.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/ReindexOperations.java Wed Sep 19 06:46:42 2018
@@ -19,6 +19,7 @@
 
 package org.apache.jackrabbit.oak.plugins.index.search;
 
+import org.apache.jackrabbit.oak.plugins.index.search.util.NodeStateCloner;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 
@@ -29,11 +30,14 @@ public class ReindexOperations {
     private final NodeState root;
     private final NodeBuilder definitionBuilder;
     private final String indexPath;
+    private final IndexDefinition.Builder indexDefBuilder;
 
-    public ReindexOperations(NodeState root, NodeBuilder definitionBuilder, String indexPath) {
+    public ReindexOperations(NodeState root, NodeBuilder definitionBuilder, String indexPath,
+                             IndexDefinition.Builder indexDefBuilder) {
         this.root = root;
         this.definitionBuilder = definitionBuilder;
         this.indexPath = indexPath;
+        this.indexDefBuilder = indexDefBuilder;
     }
 
     public IndexDefinition apply(boolean useStateFromBuilder) {
@@ -50,8 +54,10 @@ public class ReindexOperations {
         String uid = configureUniqueId(definitionBuilder);
 
         //Refresh the index definition based on update builder state
-        return IndexDefinition
-                .newBuilder(root, defnState, indexPath)
+        return indexDefBuilder
+                .root(root)
+                .defn(defnState)
+                .indexPath(indexPath)
                 .version(version)
                 .uid(uid)
                 .reindex()

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/SizeEstimator.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/SizeEstimator.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/SizeEstimator.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/SizeEstimator.java Wed Sep 19 06:46:42 2018
@@ -18,13 +18,17 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.search;
 
+/**
+ * A size estimator, for example to estimate how many entries a result will
+ * have.
+ */
 public interface SizeEstimator {
 
     /**
      * Get the estimated size, or -1 if not known.
-     * 
+     *
      * @return the size
      */
     long getSize();
-    
+
 }

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TextExtractionStatsMBean.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TextExtractionStatsMBean.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TextExtractionStatsMBean.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/TextExtractionStatsMBean.java Wed Sep 19 06:46:42 2018
@@ -21,6 +21,9 @@ package org.apache.jackrabbit.oak.plugin
 
 import org.osgi.annotation.versioning.ProviderType;
 
+/**
+ * An MBean for text extraction statistics.
+ */
 @ProviderType
 public interface TextExtractionStatsMBean {
     String TYPE = "TextExtractionStats";

Added: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java?rev=1841291&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java (added)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java Wed Sep 19 06:46:42 2018
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.search.spi.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.google.common.io.ByteSource;
+import org.apache.jackrabbit.oak.api.Blob;
+
+/**
+ * {@link ByteSource} extension to work with Oak {@link Blob}s
+ */
+public final class BlobByteSource extends ByteSource {
+    private final Blob blob;
+
+    public BlobByteSource(Blob blob) {
+        this.blob = blob;
+    }
+
+    @Override
+    public InputStream openStream() throws IOException {
+        return blob.getNewStream();
+    }
+
+    @Override
+    public long size() throws IOException {
+        return blob.length();
+    }
+
+    @Override
+    public boolean isEmpty() throws IOException {
+        return blob.length() == 0;
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/BlobByteSource.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java?rev=1841291&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java (added)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java Wed Sep 19 06:46:42 2018
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.search.spi.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeoutException;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.CountingInputStream;
+import org.apache.commons.io.IOUtils;
+import org.apache.jackrabbit.JcrConstants;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.io.LazyInputStream;
+import org.apache.jackrabbit.oak.plugins.index.fulltext.ExtractedText;
+import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditorContext;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+import static org.apache.jackrabbit.JcrConstants.JCR_DATA;
+import static org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexEditor.TEXT_EXTRACTION_ERROR;
+
+/**
+ *
+ */
+public class FulltextBinaryTextExtractor {
+
+  private static final Logger log = LoggerFactory.getLogger(FulltextBinaryTextExtractor.class);
+  private static final Parser defaultParser = createDefaultParser();
+  private static final long SMALL_BINARY = Long.getLong("oak.search.smallBinary", 16 * 1024);
+  private final TextExtractionStats textExtractionStats = new TextExtractionStats();
+  private final ExtractedTextCache extractedTextCache;
+  private final IndexDefinition definition;
+  private final boolean reindex;
+  private Parser parser;
+  private TikaConfigHolder tikaConfig;
+  /**
+   * The media types supported by the parser used.
+   */
+  private Set<MediaType> supportedMediaTypes;
+  private Set<MediaType> nonIndexedMediaType;
+
+  public FulltextBinaryTextExtractor(ExtractedTextCache extractedTextCache, IndexDefinition definition, boolean reindex) {
+    this.extractedTextCache = extractedTextCache;
+    this.definition = definition;
+    this.reindex = reindex;
+  }
+
+  public void done(boolean reindex){
+    textExtractionStats.log(reindex);
+    textExtractionStats.collectStats(extractedTextCache);
+  }
+
+  public List<String> newBinary(
+      PropertyState property, NodeState state, String path) {
+    List<String> values = Lists.newArrayList();
+    Metadata metadata = new Metadata();
+
+    //jcr:mimeType is mandatory for a binary to be indexed
+    String type = state.getString(JcrConstants.JCR_MIMETYPE);
+    type = definition.getTikaMappedMimeType(type);
+
+    if (type == null || !isSupportedMediaType(type)) {
+      log.trace(
+          "[{}] Ignoring binary content for node {} due to unsupported (or null) jcr:mimeType [{}]",
+          getIndexName(), path, type);
+      return values;
+    }
+
+    metadata.set(Metadata.CONTENT_TYPE, type);
+    if (JCR_DATA.equals(property.getName())) {
+      String encoding = state.getString(JcrConstants.JCR_ENCODING);
+      if (encoding != null) { // not mandatory
+        metadata.set(Metadata.CONTENT_ENCODING, encoding);
+      }
+    }
+
+    for (Blob v : property.getValue(Type.BINARIES)) {
+      String value = parseStringValue(v, metadata, path, property.getName());
+      if (value == null){
+        continue;
+      }
+
+      values.add(value);
+    }
+    return values;
+  }
+
+  private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) {
+    String text = extractedTextCache.get(path, propertyName, v, reindex);
+    if (text == null){
+      text = parseStringValue0(v, metadata, path);
+    }
+    return text;
+  }
+
+  private String parseStringValue0(Blob v, Metadata metadata, String path) {
+    WriteOutContentHandler handler = new WriteOutContentHandler(definition.getMaxExtractLength());
+    long start = System.currentTimeMillis();
+    long bytesRead = 0;
+    long length = v.length();
+    if (log.isDebugEnabled()) {
+      log.debug("Extracting {}, {} bytes, id {}", path, length, v.getContentIdentity());
+    }
+    try {
+      CountingInputStream stream = new CountingInputStream(new LazyInputStream(new BlobByteSource(v)));
+      try {
+        if (length > SMALL_BINARY) {
+          String name = "Extracting " + path + ", " + length + " bytes";
+          extractedTextCache.process(name, new Callable<Void>() {
+            @Override
+            public Void call() throws Exception {
+              getParser().parse(stream, handler, metadata, new ParseContext());
+              return null;
+            }
+          });
+        } else {
+          getParser().parse(stream, handler, metadata, new ParseContext());
+        }
+      } finally {
+        bytesRead = stream.getCount();
+        stream.close();
+      }
+    } catch (LinkageError e) {
+      // Capture errors caused by extraction libraries
+      // not being present. This is equivalent to disabling
+      // selected media types in configuration, so we can simply
+      // ignore these errors.
+      log.debug(
+          "[{}] Failed to extract text from a binary property: {}."
+              + " This often happens when some media types are disabled by configuration."
+              + " The stack trace is included to flag some 'unintended' failures",
+          getIndexName(), path, e);
+      extractedTextCache.put(v, ExtractedText.ERROR);
+      return TEXT_EXTRACTION_ERROR;
+    } catch (TimeoutException t) {
+      log.warn(
+          "[{}] Failed to extract text from a binary property due to timeout: {}.",
+          getIndexName(), path);
+      extractedTextCache.put(v, ExtractedText.ERROR);
+      extractedTextCache.putTimeout(v, ExtractedText.ERROR);
+      return TEXT_EXTRACTION_ERROR;
+    } catch (Throwable t) {
+      // Capture and report any other full text extraction problems.
+      // The special STOP exception is used for normal termination.
+      if (!handler.isWriteLimitReached(t)) {
+        log.debug(
+            "[{}] Failed to extract text from a binary property: {}."
+                + " This is a fairly common case, and nothing to"
+                + " worry about. The stack trace is included to"
+                + " help improve the text extraction feature.",
+            getIndexName(), path, t);
+        extractedTextCache.put(v, ExtractedText.ERROR);
+        return TEXT_EXTRACTION_ERROR;
+      } else {
+        log.debug("Extracted text size exceeded configured limit({})", definition.getMaxExtractLength());
+      }
+    }
+    String result = handler.toString();
+    if (bytesRead > 0) {
+      long time = System.currentTimeMillis() - start;
+      int len = result.length();
+      recordTextExtractionStats(time, bytesRead, len);
+      if (log.isDebugEnabled()) {
+        log.debug("Extracting {} took {} ms, {} bytes read, {} text size",
+            path, time, bytesRead, len);
+      }
+    }
+    extractedTextCache.put(v,  new ExtractedText(ExtractedText.ExtractionResult.SUCCESS, result));
+    return result;
+  }
+
+  private void recordTextExtractionStats(long timeInMillis, long bytesRead, int textLength) {
+    textExtractionStats.addStats(timeInMillis, bytesRead, textLength);
+  }
+
+  private String getIndexName() {
+    return definition.getIndexName();
+  }
+
+  //~-------------------------------------------< Tika >
+
+  public TikaConfig getTikaConfig(){
+    if (tikaConfig == null) {
+      tikaConfig = initializeTikaConfig(definition);
+    }
+    return tikaConfig.config;
+  }
+
+  private Parser getParser() {
+    if (parser == null){
+      parser = initializeTikaParser(definition);
+    }
+    return parser;
+  }
+
+  private boolean isSupportedMediaType(String type) {
+    if (supportedMediaTypes == null) {
+      supportedMediaTypes = getParser().getSupportedTypes(new ParseContext());
+      nonIndexedMediaType = getNonIndexedMediaTypes();
+    }
+    MediaType mediaType = MediaType.parse(type);
+    return supportedMediaTypes.contains(mediaType) && !nonIndexedMediaType.contains(mediaType);
+  }
+
+  private Set<MediaType> getNonIndexedMediaTypes() {
+    InputStream configStream = null;
+    String configSource = null;
+    try {
+      if (definition.hasCustomTikaConfig()) {
+        configSource = String.format("Custom config at %s", definition.getIndexPath());
+        configStream = definition.getTikaConfig();
+      } else {
+        URL configUrl = FulltextIndexEditorContext.class.getResource("tika-config.xml");
+        configSource = "Default : tika-config.xml";
+        if (configUrl != null) {
+          configStream = configUrl.openStream();
+        }
+      }
+
+      if (configStream != null) {
+        return TikaParserConfig.getNonIndexedMediaTypes(configStream);
+      }
+    } catch (TikaException | IOException | SAXException e) {
+      log.warn("Tika configuration not available : " + configSource, e);
+    } finally {
+      IOUtils.closeQuietly(configStream);
+    }
+    return Collections.emptySet();
+  }
+
+
+  private static TikaConfigHolder initializeTikaConfig(@Nullable IndexDefinition definition) {
+    ClassLoader current = Thread.currentThread().getContextClassLoader();
+    InputStream configStream = null;
+    String configSource = null;
+
+    try {
+      Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
+      if (definition != null && definition.hasCustomTikaConfig()) {
+        log.debug("[{}] Using custom tika config", definition.getIndexName());
+        configSource = "Custom config at " + definition.getIndexPath();
+        configStream = definition.getTikaConfig();
+      } else {
+        URL configUrl = FulltextIndexEditorContext.class.getResource("tika-config.xml");
+        if (configUrl != null) {
+          configSource = configUrl.toString();
+          configStream = configUrl.openStream();
+        }
+      }
+
+      if (configStream != null) {
+        return new TikaConfigHolder(new TikaConfig(configStream), configSource);
+      }
+    } catch (TikaException | IOException | SAXException e) {
+      log.warn("Tika configuration not available : " + configSource, e);
+    } finally {
+      IOUtils.closeQuietly(configStream);
+      Thread.currentThread().setContextClassLoader(current);
+    }
+    return new TikaConfigHolder(TikaConfig.getDefaultConfig(), "Default Config");
+  }
+
+  private Parser initializeTikaParser(IndexDefinition definition) {
+    ClassLoader current = Thread.currentThread().getContextClassLoader();
+    try {
+      if (definition.hasCustomTikaConfig()) {
+        Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
+        return new AutoDetectParser(getTikaConfig());
+      }
+    } finally {
+      Thread.currentThread().setContextClassLoader(current);
+    }
+    return defaultParser;
+  }
+
+  private static AutoDetectParser createDefaultParser() {
+    ClassLoader current = Thread.currentThread().getContextClassLoader();
+    TikaConfigHolder configHolder = null;
+    try {
+      configHolder = initializeTikaConfig(null);
+      Thread.currentThread().setContextClassLoader(FulltextIndexEditorContext.class.getClassLoader());
+      log.info("Loaded default Tika Config from classpath {}", configHolder);
+      return new AutoDetectParser(configHolder.config);
+    } catch (Exception e) {
+      log.warn("Tika configuration not available : " + configHolder, e);
+    } finally {
+      Thread.currentThread().setContextClassLoader(current);
+    }
+    return new AutoDetectParser();
+  }
+
+  private static final class TikaConfigHolder{
+    final TikaConfig config;
+    final String sourceInfo;
+
+    public TikaConfigHolder(TikaConfig config, String sourceInfo) {
+      this.config = config;
+      this.sourceInfo = sourceInfo;
+    }
+
+    @Override
+    public String toString() {
+      return sourceInfo;
+    }
+  }
+
+}

Propchange: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/FulltextBinaryTextExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java?rev=1841291&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java (added)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java Wed Sep 19 06:46:42 2018
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.search.spi.binary;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;
+
+class TextExtractionStats {
+    private static final Logger log = LoggerFactory.getLogger(TextExtractionStats.class);
+    /**
+     * Log stats only if time spent is more than 1 min
+     */
+    private static final long LOGGING_THRESHOLD = TimeUnit.MINUTES.toMillis(1);
+    private int count;
+    private long totalBytesRead;
+    private long totalTime;
+    private long totalTextLength;
+
+    public void addStats(long timeInMillis, long bytesRead, int textLength) {
+        count++;
+        totalBytesRead += bytesRead;
+        totalTime += timeInMillis;
+        totalTextLength += textLength;
+    }
+
+    public void log(boolean reindex) {
+        if (log.isDebugEnabled()) {
+            log.debug("Text extraction stats {}", this);
+        } else if (anyParsingDone() && (reindex || isTakingLotsOfTime())) {
+            log.info("Text extraction stats {}", this);
+        }
+    }
+
+    public void collectStats(ExtractedTextCache cache){
+        cache.addStats(count, totalTime, totalBytesRead, totalTextLength);
+    }
+
+    private boolean isTakingLotsOfTime() {
+        return totalTime > LOGGING_THRESHOLD;
+    }
+
+    private boolean anyParsingDone() {
+        return count > 0;
+    }
+
+    @Override
+    public String toString() {
+        return String.format(" %d (Time Taken %s, Bytes Read %s, Extracted text size %s)",
+                count,
+                timeInWords(totalTime),
+                humanReadableByteCount(totalBytesRead),
+                humanReadableByteCount(totalTextLength));
+    }
+
+    private static String timeInWords(long millis) {
+        return String.format("%d min, %d sec",
+                TimeUnit.MILLISECONDS.toMinutes(millis),
+                TimeUnit.MILLISECONDS.toSeconds(millis) -
+                        TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))
+        );
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TextExtractionStats.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java?rev=1841291&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java (added)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java Wed Sep 19 06:46:42 2018
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.index.search.spi.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.xml.parsers.DocumentBuilder;
+
+import com.google.common.base.Strings;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+public class TikaParserConfig {
+
+    private static final String EMPTY_PARSER = "org.apache.tika.parser.EmptyParser";
+
+    /**
+     * Determines the set of MediaType which have been configured with an EmptyParser.
+     *
+     * @param configStream stream for tika config
+     * @return set of MediaTypes which are not indexed
+     */
+    public static Set<MediaType> getNonIndexedMediaTypes(InputStream configStream) throws
+            TikaException, IOException, SAXException {
+        Set<MediaType> result = new HashSet<>();
+        Element element = getBuilder().parse(configStream).getDocumentElement();
+        NodeList nodes = element.getElementsByTagName("parsers");
+        if (nodes.getLength() == 1) {
+            Node parentNode = nodes.item(0);
+            NodeList parsersNodes = parentNode.getChildNodes();
+            for (int i = 0; i < parsersNodes.getLength(); i++) {
+                Node node = parsersNodes.item(i);
+                if (node instanceof Element) {
+                    String className = ((Element) node).getAttribute("class");
+                    if (EMPTY_PARSER.equals(className)) {
+                        NodeList mimes = ((Element) node).getElementsByTagName("mime");
+                        parseMimeTypes(result, mimes);
+                    }
+                }
+            }
+        }
+        return result;
+    }
+
+
+    private static void parseMimeTypes(Set<MediaType> result, NodeList mimes) {
+        /*
+        <parser class="org.apache.tika.parser.EmptyParser">
+            <mime>application/x-archive</mime>
+            <mime>application/x-bzip</mime>
+            <mime>application/x-bzip2</mime>
+        </parser>
+        */
+        for (int j = 0; j < mimes.getLength(); j++) {
+            Node mime = mimes.item(j);
+            if (mime instanceof Element) {
+                String mimeValue = mime.getTextContent();
+                mimeValue = Strings.emptyToNull(mimeValue);
+                if (mimeValue != null) {
+                    MediaType mediaType = MediaType.parse(mimeValue.trim());
+                    if (mediaType != null) {
+                        result.add(mediaType);
+                    }
+                }
+            }
+        }
+    }
+
+    private static DocumentBuilder getBuilder() throws TikaException {
+        return new ParseContext().getDocumentBuilder();
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/binary/TikaParserConfig.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java Wed Sep 19 06:46:42 2018
@@ -23,28 +23,30 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import javax.jcr.PropertyType;
 
 import com.google.common.collect.Iterables;
+import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
-import org.jetbrains.annotations.Nullable;
-
 import org.apache.jackrabbit.oak.plugins.index.search.Aggregate;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.binary.FulltextBinaryTextExtractor;
 import org.apache.jackrabbit.oak.plugins.index.search.util.FunctionIndexProcessor;
 import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static com.google.common.base.Preconditions.checkNotNull;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getName;
-
 import static org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getPrimaryTypeName;
 
 /**
@@ -56,44 +58,52 @@ public abstract class FulltextDocumentMa
 
     private final Logger log = LoggerFactory.getLogger(getClass());
 
-    private FulltextBinaryTextExtractor textExtractor;
-    private IndexDefinition definition;
-    private IndexDefinition.IndexingRule indexingRule;
-    private String path;
+    protected final FulltextBinaryTextExtractor textExtractor;
+    protected final IndexDefinition definition;
+    protected final IndexDefinition.IndexingRule indexingRule;
+    protected final String path;
+
+    public FulltextDocumentMaker(@Nullable FulltextBinaryTextExtractor textExtractor,
+                               @NotNull IndexDefinition definition,
+                               @NotNull IndexDefinition.IndexingRule indexingRule,
+                               @NotNull String path) {
+        this.textExtractor = textExtractor;
+        this.definition = checkNotNull(definition);
+        this.indexingRule = checkNotNull(indexingRule);
+        this.path = checkNotNull(path);
+    }
 
     protected abstract D initDoc();
 
-    protected abstract D finalizeDoc(D fields, boolean dirty, boolean facet);
-
-    protected abstract StringPropertyState createNodeNamePS();
+    protected abstract D finalizeDoc(D fields, boolean dirty, boolean facet) throws IOException;
 
     protected abstract boolean isFacetingEnabled();
 
-    protected abstract boolean isNodeName(String pname);
-
-    protected abstract boolean indexTypeOrderedFields(String pname, int tag, PropertyState property, PropertyDefinition pd);
+    protected abstract boolean indexTypeOrderedFields(D doc, String pname, int tag, PropertyState property, PropertyDefinition pd);
 
-    protected abstract boolean addBinary(D doc, Map<String, String> binaryMap);
+    protected abstract boolean addBinary(D doc, String path, List<String> binaryValues);
 
     protected abstract boolean indexFacetProperty(D doc, int tag, PropertyState property, String pname);
 
-    protected abstract boolean indexAnalyzedProperty(D doc, String pname, String value, PropertyDefinition pd);
+    protected abstract void indexAnalyzedProperty(D doc, String pname, String value, PropertyDefinition pd);
 
-    protected abstract boolean indexSuggestValue(D doc, String value);
+    protected abstract void indexSuggestValue(D doc, String value);
 
-    protected abstract boolean indexSpellcheckValue(D doc, String value);
+    protected abstract void indexSpellcheckValue(D doc, String value);
 
-    protected abstract boolean indexFulltextValue(D doc, String value);
+    protected abstract void indexFulltextValue(D doc, String value);
 
     protected abstract boolean indexTypedProperty(D doc, PropertyState property, String pname, PropertyDefinition pd);
 
-    protected abstract boolean indexNotNullProperty(D doc, PropertyDefinition pd);
+    protected abstract void indexAncestors(D doc, String path);
 
-    protected abstract boolean indexNullProperty(D doc, PropertyDefinition pd);
+    protected abstract void indexNotNullProperty(D doc, PropertyDefinition pd);
 
-    protected abstract boolean indexAggregateValue(D doc, Aggregate.NodeIncludeResult result, String value, PropertyDefinition pd);
+    protected abstract void indexNullProperty(D doc, PropertyDefinition pd);
 
-    protected abstract boolean indexNodeName(D doc, String value);
+    protected abstract void indexAggregateValue(D doc, Aggregate.NodeIncludeResult result, String value, PropertyDefinition pd);
+
+    protected abstract void indexNodeName(D doc, String value);
 
     @Nullable
     public D makeDocument(NodeState state) throws IOException {
@@ -109,11 +119,12 @@ public abstract class FulltextDocumentMa
 
         //We 'intentionally' are indexing node names only on root state as we don't support indexing relative or
         //regex for node name indexing
-        PropertyState nodenamePS = createNodeNamePS();
+        PropertyState nodenamePS =
+                new StringPropertyState(FieldNames.NODE_NAME, getName(path));
         for (PropertyState property : Iterables.concat(state.getProperties(), Collections.singleton(nodenamePS))) {
             String pname = property.getName();
 
-            if (!isVisible(pname) && !isNodeName(pname)) {
+            if (!isVisible(pname) && !FieldNames.NODE_NAME.equals(pname)) {
                 continue;
             }
 
@@ -163,6 +174,14 @@ public abstract class FulltextDocumentMa
             return null;
         }
 
+        if (indexingRule.isFulltextEnabled()) {
+            indexFulltextValue(document, name);
+        }
+
+        if (definition.evaluatePathRestrictions()){
+            indexAncestors(document, path);
+        }
+
         return finalizeDoc(document, dirty, facet);
     }
 
@@ -191,10 +210,18 @@ public abstract class FulltextDocumentMa
         boolean includeTypeForFullText = indexingRule.includePropertyType(property.getType().tag());
 
         boolean dirty = false;
-        if (Type.BINARY.tag() == property.getType().tag()
+        if (Type.BINARY.tag() == property.getType().tag() && pd.useInSimilarity) {
+            try {
+                log.trace("indexing similarity binaries for {}", pd.name);
+                indexSimilarityBinaries(doc, pd, property.getValue(Type.BINARY));
+                dirty = true;
+            } catch (Exception e) {
+                log.error("could not index similarity field for property {} and definition {}", property, pd);
+            }
+        } else if (Type.BINARY.tag() == property.getType().tag()
                 && includeTypeForFullText) {
-            Map<String, String> binaryMap = newBinary(property, state, null, path + "@" + pname);
-            addBinary(doc, binaryMap);
+            List<String> binaryValues = newBinary(property, state, path + "@" + pname);
+            addBinary(doc, null, binaryValues);
             dirty = true;
         } else {
             if (pd.propertyIndex && pd.includePropertyType(property.getType().tag())) {
@@ -222,6 +249,15 @@ public abstract class FulltextDocumentMa
 
                     if (pd.nodeScopeIndex) {
                         indexFulltextValue(doc, value);
+                        if (pd.useInSimilarity) {
+                            log.trace("indexing similarity strings for {}", pd.name);
+                            try {
+                                // fallback for when feature vectors are written in string typed properties
+                                indexSimilarityStrings(doc, pd, value);
+                            } catch (Exception e) {
+                                log.error("could not index similarity field for property {} and definition {}", property, pd);
+                            }
+                        }
                     }
                     dirty = true;
                 }
@@ -235,6 +271,10 @@ public abstract class FulltextDocumentMa
         return dirty;
     }
 
+    protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition pd, Blob blob) throws IOException;
+
+    protected abstract void indexSimilarityStrings(D doc, PropertyDefinition pd, String value) throws IOException;
+
     private boolean addTypedFields(D doc, PropertyState property, String pname, PropertyDefinition pd) {
         return indexTypedProperty(doc, property, pname, pd);
     }
@@ -264,7 +304,7 @@ public abstract class FulltextDocumentMa
                     Type.fromTag(tag, false), path);
             tag = idxDefinedTag;
         }
-        return indexTypeOrderedFields(pname, tag, property, pd);
+        return indexTypeOrderedFields(doc, pname, tag, property, pd);
     }
 
     protected boolean includePropertyValue(PropertyState property, int i, PropertyDefinition pd) {
@@ -287,22 +327,20 @@ public abstract class FulltextDocumentMa
         return name.charAt(0) != ':';
     }
 
-    private Map<String,String> newBinary(
-            PropertyState property, NodeState state, String nodePath, String path) {
+    private List<String> newBinary(
+        PropertyState property, NodeState state, String path) {
         if (textExtractor == null){
             //Skip text extraction for sync indexing
-            return Collections.emptyMap();
+            return Collections.emptyList();
         }
 
-        return textExtractor.newBinary(property, state, nodePath, path);
+        return textExtractor.newBinary(property, state, path);
     }
 
-    private boolean augmentCustomFields(final String path, final D doc,
-                                        final NodeState document) {
-        boolean dirty = false;
-
-        // TODO : extract more generic SPI for augmentor factory
-
+    // TODO : extract more generic SPI for augmentor factory
+    protected abstract boolean augmentCustomFields(final String path, final D doc, final NodeState document);// {
+//        boolean dirty = false;
+//
 //        if (augmentorFactory != null) {
 //            Iterable<Field> augmentedFields = augmentorFactory
 //                    .getIndexFieldProvider(indexingRule.getNodeTypeName())
@@ -313,9 +351,9 @@ public abstract class FulltextDocumentMa
 //                dirty = true;
 //            }
 //        }
-
-        return dirty;
-    }
+//
+//        return dirty;
+//    }
 
     //~-------------------------------------------------------< NullCheck Support >
 
@@ -323,7 +361,8 @@ public abstract class FulltextDocumentMa
         boolean fieldAdded = false;
         for (PropertyDefinition pd : indexingRule.getNotNullCheckEnabledProperties()) {
             if (isPropertyNotNull(state, pd)) {
-                fieldAdded = indexNotNullProperty(doc, pd);
+                indexNotNullProperty(doc, pd);
+                fieldAdded = true;
             }
         }
         return fieldAdded;
@@ -334,7 +373,8 @@ public abstract class FulltextDocumentMa
         boolean fieldAdded = false;
         for (PropertyDefinition pd : indexingRule.getNullCheckEnabledProperties()) {
             if (isPropertyNull(state, pd)) {
-                fieldAdded = indexNullProperty(doc, pd);
+                indexNullProperty(doc, pd);
+                fieldAdded = true;
             }
         }
         return fieldAdded;
@@ -513,8 +553,8 @@ public abstract class FulltextDocumentMa
                 //Here the fulltext is being created for aggregate root hence nodePath passed
                 //should be null
                 String nodePath = result.isRelativeNode() ? result.rootIncludePath : null;
-                Map<String, String> stringStringMap = newBinary(property, result.nodeState, nodePath, aggreagtedNodePath + "@" + pname);
-                addBinary(doc, stringStringMap);
+                List<String> binaryValues = newBinary(property, result.nodeState, aggreagtedNodePath + "@" + pname);
+                addBinary(doc, nodePath, binaryValues);
                 dirty = true;
             } else {
                 PropertyDefinition pd = null;
@@ -527,14 +567,15 @@ public abstract class FulltextDocumentMa
                 }
 
                 for (String value : property.getValue(Type.STRINGS)) {
-                    dirty = indexAggregateValue(doc, result, value, pd);
+                    indexAggregateValue(doc, result, value, pd);
+                    dirty = true;
                 }
             }
         }
         return dirty;
     }
 
-    private String getIndexName() {
+    protected String getIndexName() {
         return definition.getIndexName();
     }
 

Modified: jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditor.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditor.java?rev=1841291&r1=1841290&r2=1841291&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditor.java (original)
+++ jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextIndexEditor.java Wed Sep 19 06:46:42 2018
@@ -66,7 +66,7 @@ public class FulltextIndexEditor<D> impl
 
   private boolean propertiesChanged = false;
 
-  private List<PropertyState> propertiesModified = Lists.newArrayList();
+  private final List<PropertyState> propertiesModified = Lists.newArrayList();
 
   /**
    * Flag indicating if the current tree being traversed has a deleted parent.
@@ -81,7 +81,7 @@ public class FulltextIndexEditor<D> impl
 
   private final PathFilter.Result pathFilterResult;
 
-  FulltextIndexEditor(FulltextIndexEditorContext<D> context) throws CommitFailedException {
+  public FulltextIndexEditor(FulltextIndexEditorContext<D> context) throws CommitFailedException {
     this.parent = null;
     this.name = null;
     this.path = "/";
@@ -91,10 +91,10 @@ public class FulltextIndexEditor<D> impl
     this.pathFilterResult = context.getDefinition().getPathFilter().filter(PathUtils.ROOT_PATH);
   }
 
-  private FulltextIndexEditor(FulltextIndexEditor<D> parent, String name,
-                            MatcherState matcherState,
-                            PathFilter.Result pathFilterResult,
-                            boolean isDeleted) {
+  public FulltextIndexEditor(FulltextIndexEditor<D> parent, String name,
+                             MatcherState matcherState,
+                             PathFilter.Result pathFilterResult,
+                             boolean isDeleted) {
     this.parent = parent;
     this.name = name;
     this.path = null;