You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ng...@apache.org on 2020/05/21 08:56:03 UTC

svn commit: r1877992 [1/2] - in /jackrabbit/oak/trunk: oak-search-elastic/ oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/fa...

Author: ngupta
Date: Thu May 21 08:56:02 2020
New Revision: 1877992

URL: http://svn.apache.org/viewvc?rev=1877992&view=rev
Log:
OAK-9061|oak-search-elastic: mapping consistent with index definition(committing patch by Fabrizio)

Added:
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java   (with props)
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java   (with props)
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelperTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-search-elastic/pom.xml
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
    jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFacetTest.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFullTextAsyncTest.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java
    jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriterTest.java
    jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java

Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu May 21 08:56:02 2020
@@ -239,7 +239,7 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-core</artifactId>
-      <version>1.3</version>
+      <version>2.2</version>
       <scope>test</scope>
     </dependency>
     <dependency>

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java Thu May 21 08:56:02 2020
@@ -18,11 +18,17 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
 
+import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
@@ -56,6 +62,16 @@ public class ElasticsearchIndexDefinitio
             .map(Object::toString)
             .collect(Collectors.joining("")));
 
+    private static final Function<Integer, Boolean> isAnalyzable;
+
+    static {
+        int[] NOT_ANALYZED_TYPES = new int[] {
+                Type.BINARY.tag(), Type.LONG.tag(), Type.DOUBLE.tag(), Type.DECIMAL.tag(), Type.DATE.tag(), Type.BOOLEAN.tag()
+        };
+        Arrays.sort(NOT_ANALYZED_TYPES); // need for binary search
+        isAnalyzable = type -> Arrays.binarySearch(NOT_ANALYZED_TYPES, type) < 0;
+    }
+
     private final String remoteIndexName;
 
     public final int bulkActions;
@@ -66,6 +82,8 @@ public class ElasticsearchIndexDefinitio
     private final String indexPrefix;
     private final String remoteAlias;
 
+    private final Map<String, List<PropertyDefinition>> propertiesByName;
+
     public ElasticsearchIndexDefinition(NodeState root, NodeState defn, String indexPath, String indexPrefix) {
         super(root, getIndexDefinitionState(defn), determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
         boolean isReindex = defn.getBoolean(IndexConstants.REINDEX_PROPERTY_NAME);
@@ -78,6 +96,12 @@ public class ElasticsearchIndexDefinitio
         this.bulkFlushIntervalMs = getOptionalValue(defn, BULK_FLUSH_INTERVAL_MS, BULK_FLUSH_INTERVAL_MS_DEFAULT);
         this.bulkRetries = getOptionalValue(defn, BULK_RETRIES, BULK_RETRIES_DEFAULT);
         this.bulkRetriesBackoff = getOptionalValue(defn, BULK_RETRIES_BACKOFF, BULK_RETRIES_BACKOFF_DEFAULT);
+
+        this.propertiesByName = getDefinedRules()
+                .stream()
+                .flatMap(rule -> StreamSupport.stream(rule.getProperties().spliterator(), false))
+                .filter(pd -> pd.index) // keep only properties that can be indexed
+                .collect(Collectors.groupingBy(pd -> pd.name));
     }
 
     /**
@@ -99,6 +123,35 @@ public class ElasticsearchIndexDefinitio
         return remoteIndexName;
     }
 
+    public Map<String, List<PropertyDefinition>> getPropertiesByName() {
+        return propertiesByName;
+    }
+
+    /**
+     * Returns the keyword field name mapped in Elasticsearch for the specified property name.
+     * @param propertyName the property name in the index rules
+     * @return the field name identifier in Elasticsearch
+     * @throws IllegalArgumentException if the specified name is not part of this {@code ElasticsearchIndexDefinition}
+     */
+    public String getElasticKeyword(String propertyName) {
+        List<PropertyDefinition> propertyDefinitions = propertiesByName.get(propertyName);
+        if (propertyDefinitions == null) {
+            throw new IllegalArgumentException(propertyName + " is not part of this ElasticsearchIndexDefinition");
+        }
+
+        String field = propertyName;
+        // it's ok to look at the first property since we are sure they all have the same type
+        int type = propertyDefinitions.get(0).getType();
+        if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {
+            field += ".keyword";
+        }
+        return field;
+    }
+
+    public boolean isAnalyzed(List<PropertyDefinition> propertyDefinitions) {
+        return propertyDefinitions.stream().anyMatch(pd -> pd.analyzed || pd.fulltextEnabled());
+    }
+
     private String setupAlias() {
         // TODO: implement advanced remote index name strategy that takes into account multiple tenants and re-index process
         return getESSafeIndexName(indexPrefix + "." + getIndexPath());

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java Thu May 21 08:56:02 2020
@@ -20,17 +20,17 @@ package org.apache.jackrabbit.oak.plugin
 
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexNode;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.search.SearchHit;
 
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 
 public class ElasticFacetHelper {
 
@@ -58,32 +58,28 @@ public class ElasticFacetHelper {
         return elasticsearchFacets;
     }
 
-    public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) throws UnsupportedEncodingException {
+    public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) {
         List<String> accessibleDocs = new LinkedList<>();
         for (SearchHit searchHit : searchHits) {
-            String id = searchHit.getId();
-            String path = idToPath(id);
+            final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+            String path = (String) sourceMap.get(FieldNames.PATH);
             if (filter.isAccessible(path)) {
-                accessibleDocs.add(id);
+                accessibleDocs.add(path);
             }
         }
         return accessibleDocs;
     }
 
-    public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) throws UnsupportedEncodingException {
+    public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) {
         int count = 0;
         while (searchHitIterator.hasNext()) {
             SearchHit searchHit = searchHitIterator.next();
-            String id = searchHit.getId();
-            String path = idToPath(id);
+            final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+            String path = (String) sourceMap.get(FieldNames.PATH);
             if (filter.isAccessible(path)) {
                 count++;
             }
         }
         return count;
     }
-
-    public static String idToPath(String id) throws UnsupportedEncodingException {
-        return URLDecoder.decode(id, "UTF-8");
-    }
 }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
@@ -49,7 +50,8 @@ public interface ElasticsearchFacets {
      * @return A map with facetName as key and List of facets in descending order of facetCount.
      * @throws IOException
      */
-    Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException;
+    Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                  int numberOfFacets) throws IOException;
 
     /**
      * We can retrieve Aggregation in a single call to elastic search while querying. Which can then be passed

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -51,12 +52,13 @@ public class InsecureElasticSearchFacets
     }
 
     @Override
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int numberOfFacets) throws IOException {
         if (elasticsearchAggregationData != null && numberOfFacets <= elasticsearchAggregationData.getNumberOfFacets()) {
             return changeToFacetList(elasticsearchAggregationData.getAggregations().getAsMap(), numberOfFacets);
         }
         LOG.warn("Facet data is being retrieved by again calling Elasticsearch");
-        List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, numberOfFacets);
+        List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, indexDefinition, numberOfFacets);
         ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                 .withQuery(query)
                 .withAggregation(aggregationBuilders)

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -50,7 +51,8 @@ public class SecureElasticSearchFacets e
     for docs.
      */
     @Override
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int numberOfFacets) throws IOException {
         Map<String, Map<String, Long>> secureFacetCount = new HashMap<>();
         Filter filter = getPlan().getFilter();
         boolean doFetch = true;
@@ -70,7 +72,8 @@ public class SecureElasticSearchFacets e
             List<String> accessibleDocs = ElasticFacetHelper.getAccessibleDocIds(searchHits, filter);
             if (accessibleDocs.isEmpty()) continue;
             QueryBuilder queryWithAccessibleDocIds = QueryBuilders.termsQuery("_id", accessibleDocs);
-            Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds, accessibleDocs.size());
+            Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds,
+                    accessibleDocs.size(), indexDefinition);
             collateAggregations(secureFacetCount, accessibleDocsAggregation);
         }
 
@@ -115,15 +118,15 @@ public class SecureElasticSearchFacets e
         }
     }
 
-    private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount) throws IOException {
-        List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), facetCount);
+    private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount,
+                                                             ElasticsearchIndexDefinition indexDefinition) throws IOException {
+        List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), indexDefinition, facetCount);
         ElasticsearchSearcherModel idBasedelasticsearchSearcherModelWithAggregation = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                 .withQuery(queryWithAccessibleDocIds)
                 .withAggregation(aggregationBuilders)
                 .build();
 
         SearchResponse facetDocs = getSearcher().search(idBasedelasticsearchSearcherModelWithAggregation);
-        Map<String, Aggregation> aggregationMap = facetDocs.getAggregations().asMap();
-        return aggregationMap;
+        return facetDocs.getAggregations().asMap();
     }
 }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
 import com.google.common.collect.AbstractIterator;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
@@ -52,7 +53,9 @@ public class StatisticalElasticSearchFac
         this.secureFacetConfiguration = secureFacetConfiguration;
     }
 
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+    @Override
+    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int numberOfFacets) throws IOException {
         Map<String, List<FulltextIndex.Facet>> result = new HashMap<>();
         Map<String, List<FulltextIndex.Facet>> topChildren;
         Filter filter = getPlan().getFilter();
@@ -61,7 +64,7 @@ public class StatisticalElasticSearchFac
         ElasticsearchAggregationData aggregationData = getElasticsearchAggregationData();
         if (aggregationData == null || aggregationData.getNumberOfFacets() < numberOfFacets) {
             LOG.warn("Facets and Totalhit count are being retrieved by calling Elasticsearch");
-            topChildren = super.getElasticSearchFacets(numberOfFacets);
+            topChildren = super.getElasticSearchFacets(indexDefinition, numberOfFacets);
             ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                     .withQuery(getQuery())
                     .withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
@@ -79,7 +82,8 @@ public class StatisticalElasticSearchFac
         // instead of statistical count. <OAK-8138>
         if (hitCount < sampleSize) {
             LOG.debug("SampleSize: {} is greater than hitcount: {}, Getting secure facet count", sampleSize, hitCount);
-            return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(numberOfFacets);
+            return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(indexDefinition,
+                    numberOfFacets);
         }
         long randomSeed = secureFacetConfiguration.getRandomSeed();
         Iterator<SearchHit> docIterator = getMatchingDocIterator(getSearcher(), getQuery());

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java Thu May 21 08:56:02 2020
@@ -25,8 +25,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -35,10 +33,7 @@ import java.util.Map;
 class ElasticsearchDocument {
     private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchDocument.class);
 
-    // id should only be useful for logging (at least as of now)
     private final String path;
-
-    private final String id;
     private final List<String> fulltext;
     private final List<String> suggest;
     private final List<String> notNullProps;
@@ -47,13 +42,6 @@ class ElasticsearchDocument {
 
     ElasticsearchDocument(String path) {
         this.path = path;
-        String id = null;
-        try {
-            id = pathToId(path);
-        } catch (UnsupportedEncodingException e) {
-            LOG.warn("Couldn't encode {} as ES id", path);
-        }
-        this.id = id;
         this.fulltext = new ArrayList<>();
         this.suggest = new ArrayList<>();
         this.notNullProps = new ArrayList<>();
@@ -93,24 +81,20 @@ class ElasticsearchDocument {
         String parPath = PathUtils.getParentPath(path);
         int depth = PathUtils.getDepth(path);
 
-        // TODO: remember that mapping must be configured with
-        // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
         addProperty(FieldNames.ANCESTORS, parPath);
         addProperty(FieldNames.PATH_DEPTH, depth);
     }
 
-    String getId() {
-        return id;
-    }
-
     public String build() {
-        String ret = null;
+        String ret;
         try {
             XContentBuilder builder = XContentFactory.jsonBuilder();
             builder.startObject();
-            if (fulltext.size() > 0) {
-                builder.field(FieldNames.FULLTEXT, fulltext);
-            }
+            {
+                builder.field(FieldNames.PATH, path);
+                if (fulltext.size() > 0) {
+                    builder.field(FieldNames.FULLTEXT, fulltext);
+                }
             if (suggest.size() > 0) {
                 builder.startObject(FieldNames.SUGGEST).field("input", suggest).endObject();
             }
@@ -120,17 +104,18 @@ class ElasticsearchDocument {
             if (nullProps.size() > 0) {
                 builder.field(FieldNames.NULL_PROPS, nullProps);
             }
-            for (Map.Entry<String, Object> prop : properties.entrySet()) {
-                builder.field(prop.getKey(), prop.getValue());
+                for (Map.Entry<String, Object> prop : properties.entrySet()) {
+                    builder.field(prop.getKey(), prop.getValue());
+                }
             }
             builder.endObject();
 
             ret = Strings.toString(builder);
         } catch (IOException e) {
-            LOG.error("Error serializing document - id: {}, properties: {}, fulltext: {}, suggest: {}, " +
+            LOG.error("Error serializing document - path: {}, properties: {}, fulltext: {}, suggest: {}, " +
                             "notNullProps: {}, nullProps: {}",
-                    path, properties, fulltext, suggest, notNullProps, nullProps,
-                    e);
+                    path, properties, fulltext, suggest, notNullProps, nullProps, e);
+            ret = null;
         }
 
         return ret;
@@ -140,8 +125,4 @@ class ElasticsearchDocument {
     public String toString() {
         return build();
     }
-
-    public static String pathToId(String path) throws UnsupportedEncodingException {
-        return URLEncoder.encode(path, "UTF-8");
-    }
 }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Thu May 21 08:56:02 2020
@@ -48,10 +48,9 @@ class ElasticsearchDocumentMaker extends
     }
 
     @Override
-    protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) throws IOException {
-        if (doc.getId() == null) {
-            throw new IOException("Couldn't generate id for doc - (More details during initDoc)" + doc);
-        }
+    protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) {
+        // evaluate path restrictions is enabled by default in elastic. Always index ancestors
+        doc.indexAncestors(path);
         return doc;
     }
 
@@ -134,10 +133,13 @@ class ElasticsearchDocumentMaker extends
         doc.addProperty(pname, f);
     }
 
+    /**
+     * Empty method implementation. Ancestors are always indexed
+     *
+     * @see ElasticsearchDocumentMaker#finalizeDoc
+     */
     @Override
-    protected void indexAncestors(ElasticsearchDocument doc, String path) {
-        doc.indexAncestors(path);
-    }
+    protected void indexAncestors(ElasticsearchDocument doc, String path) { /* empty */ }
 
     @Override
     protected void indexNotNullProperty(ElasticsearchDocument doc, PropertyDefinition pd) {

Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java?rev=1877992&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java Thu May 21 08:56:02 2020
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.index;
+
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
+import org.elasticsearch.client.indices.CreateIndexRequest;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Provides utility functions around Elasticsearch indexing
+ */
+class ElasticsearchIndexHelper {
+
+    public static CreateIndexRequest createIndexRequest(ElasticsearchIndexDefinition indexDefinition) throws IOException {
+        final CreateIndexRequest request = new CreateIndexRequest(indexDefinition.getRemoteIndexName());
+
+        // provision settings
+        // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
+        request.settings(Settings.builder()
+                .put("analysis.analyzer.ancestor_analyzer.type", "custom")
+                .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy"));
+
+        // provision mappings
+        final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
+        mappingBuilder.startObject();
+        {
+            mappingBuilder.startObject("properties");
+            {
+                mapInternalProperties(mappingBuilder);
+                mapIndexRules(indexDefinition, mappingBuilder);
+            }
+            mappingBuilder.endObject();
+        }
+        mappingBuilder.endObject();
+        request.mapping(mappingBuilder);
+
+        return request;
+    }
+
+    private static void mapInternalProperties(XContentBuilder mappingBuilder) throws IOException {
+        mappingBuilder.startObject(FieldNames.PATH)
+                .field("type", "keyword")
+                .endObject();
+        mappingBuilder.startObject(FieldNames.ANCESTORS)
+                .field("type", "text")
+                .field("analyzer", "ancestor_analyzer")
+                .field("search_analyzer", "keyword")
+                .field("search_quote_analyzer", "keyword")
+                .endObject();
+        mappingBuilder.startObject(FieldNames.PATH_DEPTH)
+                .field("type", "integer")
+                .endObject();
+        // TODO: the mapping below is for features currently not supported. These need to be reviewed
+        // when the specific features will be implemented
+//                mappingBuilder.startObject(FieldNames.SUGGEST)
+//                        .field("type", "completion")
+//                        .endObject();
+//                mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
+//                        .field("type", "keyword")
+//                        .endObject();
+//                mappingBuilder.startObject(FieldNames.NULL_PROPS)
+//                        .field("type", "keyword")
+//                        .endObject();
+    }
+
+    private static void mapIndexRules(ElasticsearchIndexDefinition indexDefinition, XContentBuilder mappingBuilder) throws IOException {
+        // we need to check if in the defined rules there are properties with the same name and different types
+        final List<Map.Entry<String, List<PropertyDefinition>>> multiTypesFields = indexDefinition.getPropertiesByName()
+                .entrySet()
+                .stream()
+                .filter(e -> e.getValue().size() > 1)
+                .filter(e -> e.getValue().stream().map(PropertyDefinition::getType).distinct().count() > 1)
+                .collect(Collectors.toList());
+
+        if (!multiTypesFields.isEmpty()) {
+            String fields = multiTypesFields.stream().map(Map.Entry::getKey).collect(Collectors.joining(", ", "[", "]"));
+            throw new IllegalStateException(indexDefinition.getIndexPath() + " has properties with the same name and " +
+                    "different types " + fields);
+        }
+
+        for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
+            final String name = entry.getKey();
+            final List<PropertyDefinition> propertyDefinitions = entry.getValue();
+
+            Type<?> type = Type.fromTag(propertyDefinitions.get(0).getType(), false);
+            mappingBuilder.startObject(name);
+            {
+                // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+                if (Type.BINARY.equals(type)) {
+                    mappingBuilder.field("type", "binary");
+                } else if (Type.LONG.equals(type)) {
+                    mappingBuilder.field("type", "long");
+                } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
+                    mappingBuilder.field("type", "double");
+                } else if (Type.DATE.equals(type)) {
+                    mappingBuilder.field("type", "date");
+                } else if (Type.BOOLEAN.equals(type)) {
+                    mappingBuilder.field("type", "boolean");
+                } else {
+                    if (indexDefinition.isAnalyzed(propertyDefinitions)) {
+                        mappingBuilder.field("type", "text");
+                        // always add keyword for sorting / faceting as sub-field
+                        mappingBuilder.startObject("fields");
+                        {
+                            mappingBuilder.startObject("keyword")
+                                    .field("type", "keyword")
+                                    .endObject();
+                        }
+                        mappingBuilder.endObject();
+                    } else {
+                        // always add keyword for sorting / faceting
+                        mappingBuilder.field("type", "keyword");
+                    }
+                }
+            }
+            mappingBuilder.endObject();
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java Thu May 21 08:56:02 2020
@@ -18,8 +18,8 @@ package org.apache.jackrabbit.oak.plugin
 
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection;
 import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
-import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriter;
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.DocWriteRequest;
 import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
 import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
@@ -37,13 +37,11 @@ import org.elasticsearch.client.IndicesC
 import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.indices.CreateIndexRequest;
 import org.elasticsearch.client.indices.CreateIndexResponse;
+import org.elasticsearch.client.indices.GetIndexRequest;
 import org.elasticsearch.cluster.metadata.AliasMetaData;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.TestOnly;
@@ -51,12 +49,17 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Phaser;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchDocument.pathToId;
 import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 
@@ -66,8 +69,19 @@ class ElasticsearchIndexWriter implement
     private final ElasticsearchConnection elasticsearchConnection;
     private final ElasticsearchIndexDefinition indexDefinition;
 
+    /**
+     * Coordinates communication between bulk processes. It has a main controller registered at creation time and
+     * de-registered on {@link ElasticsearchIndexWriter#close(long)}. Each bulk request register a new party in
+     * this Phaser in {@link OakBulkProcessorListener#beforeBulk(long, BulkRequest)} and de-register itself when
+     * the request returns.
+     */
+    private final Phaser phaser = new Phaser(1); // register main controller
+    /**
+     * Key-value structure to keep the history of bulk requests. Keys are the bulk execution ids, the boolean
+     * value is {@code true} when at least an update is performed, otherwise {@code false}.
+     */
+    private final ConcurrentHashMap<Long, Boolean> updatesMap = new ConcurrentHashMap<>();
     private final BulkProcessor bulkProcessor;
-    private Optional<Boolean> indexUpdated = Optional.empty();
 
     ElasticsearchIndexWriter(@NotNull ElasticsearchConnection elasticsearchConnection,
                                        @NotNull ElasticsearchIndexDefinition indexDefinition) {
@@ -99,70 +113,76 @@ class ElasticsearchIndexWriter implement
     }
 
     @Override
-    public void updateDocument(String path, ElasticsearchDocument doc) throws IOException {
+    public void updateDocument(String path, ElasticsearchDocument doc) {
         IndexRequest request = new IndexRequest(indexDefinition.getRemoteIndexAlias())
-                .id(pathToId(path))
+                .id(idFromPath(path))
                 .source(doc.build(), XContentType.JSON);
         bulkProcessor.add(request);
     }
 
     @Override
-    public void deleteDocuments(String path) throws IOException {
+    public void deleteDocuments(String path) {
         DeleteRequest request = new DeleteRequest(indexDefinition.getRemoteIndexAlias())
-                .id(pathToId(path));
+                .id(idFromPath(path));
         bulkProcessor.add(request);
     }
 
     @Override
-    public boolean close(long timestamp) throws IOException {
+    public boolean close(long timestamp) {
         LOG.trace("Calling close on bulk processor {}", bulkProcessor);
         bulkProcessor.close();
         LOG.trace("Bulk Processor {} closed", bulkProcessor);
 
-        // bulkProcessor.close() calls the OakBulkProcessorListener.beforeBulk in a blocking manner
-        // indexUpdated would be unset there if it was false till now (not even a single update succeeded)
-        // in this case wait for sometime for the last OakBulkProcessorListener.afterBulk to be called
-        // where indexUpdated can possibly be set to true, return false in case of timeout.
-        // We don't wait in case indexUpdated is already set (This would be if any of the previous flushes for this processor
-        // were successful i.e index was updated at least once)
-        final long start = System.currentTimeMillis();
-        long timeoutMillis = indexDefinition.bulkFlushIntervalMs * 5 ;
-        while (!indexUpdated.isPresent()) {
-            long lastAttempt = System.currentTimeMillis();
-            long elapsedTime = lastAttempt - start;
-            if (elapsedTime > timeoutMillis) {
-                // indexUpdate was not set till now, return false
-                LOG.trace("Timed out waiting for the bulk processor response. Returning indexUpdated = false");
-                return false;
-            } else {
-                try {
-                    LOG.trace("Waiting for afterBulk response...");
-                    Thread.sleep(100);
-                } catch (InterruptedException ex) {
-                    //
-                }
-            }
+        // de-register main controller
+        final int phase = phaser.arriveAndDeregister();
+
+        try {
+            phaser.awaitAdvanceInterruptibly(phase, indexDefinition.bulkFlushIntervalMs * 5, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException | TimeoutException e) {
+            LOG.error("Error waiting for bulk requests to return", e);
+        }
+
+        if (LOG.isTraceEnabled()) {
+            LOG.trace("Bulk identifier -> update status = {}", updatesMap);
         }
-        LOG.trace("Returning indexUpdated = {}", indexUpdated.get());
-        return indexUpdated.get();
+        return updatesMap.containsValue(Boolean.TRUE);
     }
 
-    // TODO: we need to check if the index already exists and in that case we have to figure out if there are
-    // "breaking changes" in the index definition
     protected void provisionIndex() throws IOException {
+        // check if index already exists
+        boolean exists = elasticsearchConnection.getClient().indices().exists(
+                new GetIndexRequest(indexDefinition.getRemoteIndexName()), RequestOptions.DEFAULT
+        );
+        if (exists) {
+            LOG.info("Index {} already exists. Skip index provision", indexDefinition.getRemoteIndexName());
+            return;
+        }
 
-        IndicesClient indicesClient = elasticsearchConnection.getClient().indices();
+        final IndicesClient indicesClient = elasticsearchConnection.getClient().indices();
         final String indexName = indexDefinition.getRemoteIndexName();
 
-        CreateIndexRequest createIndexRequest = constructCreateIndexRequest(indexName);
-        String requestMsg = Strings.toString(createIndexRequest.toXContent(jsonBuilder(), EMPTY_PARAMS));
-        CreateIndexResponse response = indicesClient.create(createIndexRequest, RequestOptions.DEFAULT);
-        checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
-
-        LOG.info("Updated settings for index {} = {}. Response acknowledged: {}",
-                indexDefinition.getRemoteIndexAlias(), requestMsg, response.isAcknowledged());
-
+        // create the new index
+        final CreateIndexRequest request = ElasticsearchIndexHelper.createIndexRequest(indexDefinition);
+        try {
+            if (LOG.isDebugEnabled()) {
+                final String requestMsg = Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS));
+                LOG.debug("Creating Index with request {}", requestMsg);
+            }
+            CreateIndexResponse response = indicesClient.create(request, RequestOptions.DEFAULT);
+            LOG.info("Updated settings for index {}. Response acknowledged: {}",
+                    indexDefinition.getRemoteIndexName(), response.isAcknowledged());
+            checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
+        } catch (ElasticsearchStatusException ese) {
+            // We already check index existence as first thing in this method, if we get here it means we have got into
+            // a conflict (eg: multiple cluster nodes provision concurrently).
+            // Elasticsearch does not have a CREATE IF NOT EXIST, need to inspect exception
+            // https://github.com/elastic/elasticsearch/issues/19862
+            if (ese.status().getStatus() == 400 && ese.getDetailedMessage().contains("resource_already_exists_exception")) {
+                LOG.warn("Index {} already exists. Ignoring error", indexName);
+            } else throw ese;
+        }
 
+        // update the mapping
         GetAliasesRequest getAliasesRequest = new GetAliasesRequest(indexDefinition.getRemoteIndexAlias());
         GetAliasesResponse aliasesResponse = indicesClient.getAlias(getAliasesRequest, RequestOptions.DEFAULT);
         Map<String, Set<AliasMetaData>> aliases = aliasesResponse.getAliases();
@@ -180,6 +200,8 @@ class ElasticsearchIndexWriter implement
                 + indexDefinition.getRemoteIndexAlias());
         LOG.info("Updated alias {} to index {}. Response acknowledged: {}", indexDefinition.getRemoteIndexAlias(),
                 indexName, updateAliasResponse.isAcknowledged());
+
+        // once the alias has been updated, we can safely remove the old index
         deleteOldIndices(indicesClient, aliases.keySet());
     }
 
@@ -201,57 +223,15 @@ class ElasticsearchIndexWriter implement
         LOG.info("Deleted indices {}. Response acknowledged: {}", indices.toString(), deleteIndexResponse.isAcknowledged());
     }
 
-    private CreateIndexRequest constructCreateIndexRequest(String indexName) throws IOException {
-        CreateIndexRequest request = new CreateIndexRequest(indexName);
-
-        // provision settings
-        request.settings(Settings.builder()
-                .put("analysis.analyzer.ancestor_analyzer.type", "custom")
-                .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy"));
-
-        // provision mappings
-        XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
-        mappingBuilder.startObject();
-        {
-            mappingBuilder.startObject("properties");
-            {
-                mappingBuilder.startObject(FieldNames.ANCESTORS)
-                        .field("type", "text")
-                        .field("analyzer", "ancestor_analyzer")
-                        .field("search_analyzer", "keyword")
-                        .field("search_quote_analyzer", "keyword")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.PATH_DEPTH)
-                        .field("type", "integer")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.SUGGEST)
-                        .field("type", "completion")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
-                        .field("type", "keyword")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.NULL_PROPS)
-                        .field("type", "keyword")
-                        .endObject();
-            }
-            mappingBuilder.endObject();
-        }
-        mappingBuilder.endObject();
-        request.mapping(mappingBuilder);
-
-        return request;
-    }
-
     private class OakBulkProcessorListener implements BulkProcessor.Listener {
 
         @Override
         public void beforeBulk(long executionId, BulkRequest bulkRequest) {
-            if (indexUpdated.isPresent() && !indexUpdated.get()) {
-                // Reset the state only if it's false
-                // If it's true that means index was updated at least once by this processor
-                // and we can return true for indexUpdate.
-                indexUpdated = Optional.empty();
-            }
+            // register new bulk party
+            phaser.register();
+            // init update status
+            updatesMap.put(executionId, Boolean.FALSE);
+
             LOG.info("Sending bulk with id {} -> {}", executionId, bulkRequest.getDescription());
             if (LOG.isTraceEnabled()) {
                 LOG.trace("Bulk Requests: \n{}", bulkRequest.requests()
@@ -279,29 +259,41 @@ class ElasticsearchIndexWriter implement
                         LOG.error("Bulk item with id {} failed", failure.getId(), failure.getCause());
                     } else {
                         // Set indexUpdated to true even if 1 item was updated successfully
-                        indexUpdated = Optional.of(true);
+                        updatesMap.put(executionId, Boolean.TRUE);
                     }
                 }
-                // Only set indexUpdated to false if it's unset
-                // If set and true, that means index was updated at least once by this processor.
-                // If set and false, no need to do anything
-                if (!indexUpdated.isPresent()) {
-                    indexUpdated = Optional.of(false);
-                }
             } else {
-                indexUpdated = Optional.of(true);
+                updatesMap.put(executionId, Boolean.TRUE);
             }
+            phaser.arriveAndDeregister();
         }
 
         @Override
         public void afterBulk(long executionId, BulkRequest bulkRequest, Throwable throwable) {
-            // Only set indexUpdated to false if it's unset
-            // If set and true, that means index was updated at least once by this processor.
-            // If set and false, no need to do anything
-            if (!indexUpdated.isPresent()) {
-                indexUpdated = Optional.of(false);
-            }
             LOG.error("Bulk with id {} threw an error", executionId, throwable);
+            phaser.arriveAndDeregister();
+        }
+    }
+
+    /**
+     * Transforms a path into an _id compatible with Elasticsearch specification. The path cannot be larger than 512
+     * bytes. For performance reasons paths that are already compatible are returned untouched. Otherwise, SHA-256
+     * algorithm is used to return a transformed path (32 bytes max).
+     *
+     * @param path the document path
+     * @return the Elasticsearch compatible path
+     * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html">
+     *     Mapping _id field</a>
+     */
+    private static String idFromPath(@NotNull String path) {
+        byte[] pathBytes = path.getBytes(StandardCharsets.UTF_8);
+        if (pathBytes.length > 512) {
+            try {
+                return new String(MessageDigest.getInstance("SHA-256").digest(pathBytes));
+            } catch (NoSuchAlgorithmException e) {
+                throw new IllegalStateException(e);
+            }
         }
+        return path;
     }
 }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java Thu May 21 08:56:02 2020
@@ -90,7 +90,9 @@ class ElasticsearchIndex extends Fulltex
 
     @Override
     protected String getFulltextRequestString(IndexPlan plan, IndexNode indexNode) {
-        return Strings.toString(ElasticsearchResultRowIterator.getESQuery(plan, getPlanResult(plan)));
+        return Strings.toString(new ElasticsearchResultRowIterator(plan.getFilter(), getPlanResult(plan), plan,
+                acquireIndexNode(plan), FulltextIndex::shouldInclude, getEstimator(plan.getPlanName()))
+                .getESQuery(plan, getPlanResult(plan)));
     }
 
     @Override

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java Thu May 21 08:56:02 2020
@@ -53,8 +53,6 @@ import org.slf4j.LoggerFactory;
 
 import javax.jcr.PropertyType;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
@@ -68,7 +66,6 @@ import java.util.stream.StreamSupport;
 
 import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
 import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
-import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.TermQueryBuilderFactory.newAncestorQuery;
@@ -158,9 +155,10 @@ class ElasticsearchResultRowIterator imp
         try {
             ElasticsearchSearcher searcher = getCurrentSearcher(indexNode);
             QueryBuilder query = getESQuery(plan, planResult);
-            int numberOfFacets = indexNode.getDefinition().getNumberOfTopFacets();
+            ElasticsearchIndexDefinition indexDefinition = indexNode.getDefinition();
+            int numberOfFacets = indexDefinition.getNumberOfTopFacets();
             List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil
-                    .getAggregators(plan, numberOfFacets);
+                    .getAggregators(plan, indexDefinition, numberOfFacets);
 
             ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                     .withQuery(query)
@@ -236,10 +234,9 @@ class ElasticsearchResultRowIterator imp
         return new ElasticsearchSearcher(indexNode);
     }
 
-    private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
-                                                         ElasticsearchFacetProvider elasticsearchFacetProvider) throws IOException {
-        String id = hit.getId();
-        String path = idToPath(id);
+    private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit, ElasticsearchFacetProvider elasticsearchFacetProvider) {
+        final Map<String, Object> sourceMap = hit.getSourceAsMap();
+        String path = (String) sourceMap.get(FieldNames.PATH);
         if (path != null) {
             if ("".equals(path)) {
                 path = "/";
@@ -276,7 +273,7 @@ class ElasticsearchResultRowIterator imp
      * @param planResult
      * @return the Lucene query
      */
-    static QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
+    public QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
         List<QueryBuilder> qs = new ArrayList<>();
         Filter filter = plan.getFilter();
         FullTextExpression ft = filter.getFullTextConstraint();
@@ -504,13 +501,12 @@ class ElasticsearchResultRowIterator imp
         return unwrapped;
     }
 
-    private static void addNonFullTextConstraints(List<QueryBuilder> qs,
+    private void addNonFullTextConstraints(List<QueryBuilder> qs,
                                                   IndexPlan plan, PlanResult planResult) {
         final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
                 StreamSupport.stream(iterable.spliterator(), false).anyMatch(value::equals);
 
         Filter filter = plan.getFilter();
-        IndexDefinition defn = planResult.indexDefinition;
         if (!filter.matchesAllTypes()) {
             addNodeTypeConstraints(planResult.indexingRule, qs, filter);
         }
@@ -518,20 +514,15 @@ class ElasticsearchResultRowIterator imp
         String path = FulltextIndex.getPathRestriction(plan);
         switch (filter.getPathRestriction()) {
             case ALL_CHILDREN:
-                if (defn.evaluatePathRestrictions()) {
-                    if ("/".equals(path)) {
-                        break;
-                    }
+                if (!"/".equals(path)) {
                     qs.add(newAncestorQuery(path));
                 }
                 break;
             case DIRECT_CHILDREN:
-                if (defn.evaluatePathRestrictions()) {
-                    BoolQueryBuilder bq = boolQuery();
-                    bq.must(newAncestorQuery(path));
-                    bq.must(newDepthQuery(path, planResult));
-                    qs.add(bq);
-                }
+                BoolQueryBuilder bq = boolQuery();
+                bq.must(newAncestorQuery(path));
+                bq.must(newDepthQuery(path, planResult));
+                qs.add(bq);
                 break;
             case EXACT:
                 // For transformed paths, we can only add path restriction if absolute path to property can be
@@ -588,7 +579,7 @@ class ElasticsearchResultRowIterator imp
 
             if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
                     && pr.lastIncluding) {
-                String first = pr.first.getValue(STRING);
+                String first = pr.first.getValue(Type.STRING);
                 first = first.replace("\\", "");
                 if (JCR_PATH.equals(name)) {
                     qs.add(newPathQuery(first));
@@ -637,7 +628,7 @@ class ElasticsearchResultRowIterator imp
     }
 
     private static QueryBuilder createNodeNameQuery(Filter.PropertyRestriction pr) {
-        String first = pr.first != null ? pr.first.getValue(STRING) : null;
+        String first = pr.first != null ? pr.first.getValue(Type.STRING) : null;
         if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
                 && pr.lastIncluding) {
             // [property]=[value]
@@ -685,7 +676,7 @@ class ElasticsearchResultRowIterator imp
     }
 
     @Nullable
-    private static QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
+    private QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
                                             PropertyDefinition defn) {
         int propType = FulltextIndex.determinePropertyType(defn, pr);
 
@@ -699,31 +690,29 @@ class ElasticsearchResultRowIterator imp
             return newNotNullPropQuery(defn.name);
         }
 
+        final String field = indexNode.getDefinition().getElasticKeyword(propertyName);
+
         QueryBuilder in;
         switch (propType) {
             case PropertyType.DATE: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> parse(value.getValue(Type.DATE)).getTime());
+                in = newPropertyRestrictionQuery(field, pr, value -> parse(value.getValue(Type.DATE)).getTime());
                 break;
             }
             case PropertyType.DOUBLE: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> value.getValue(Type.DOUBLE));
+                in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.DOUBLE));
                 break;
             }
             case PropertyType.LONG: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> value.getValue(Type.LONG));
+                in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.LONG));
                 break;
             }
             default: {
                 if (pr.isLike) {
-                    return createLikeQuery(propertyName, pr.first.getValue(STRING));
+                    return createLikeQuery(propertyName, pr.first.getValue(Type.STRING));
                 }
 
                 //TODO Confirm that all other types can be treated as string
-                in = newPropertyRestrictionQuery(propertyName, true, pr,
-                        value -> value.getValue(Type.STRING));
+                in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.STRING));
             }
         }
 
@@ -734,10 +723,6 @@ class ElasticsearchResultRowIterator imp
         throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn);
     }
 
-    private static String idToPath(String id) throws UnsupportedEncodingException {
-        return URLDecoder.decode(id, "UTF-8");
-    }
-
     class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider {
         private ElasticsearchFacets elasticsearchFacets;
         private Map<String, List<FulltextIndex.Facet>> cachedResults = new HashMap<>();
@@ -750,7 +735,7 @@ class ElasticsearchResultRowIterator imp
         public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName) throws IOException {
             String facetProp = FulltextIndex.parseFacetField(columnName);
             if (cachedResults.get(facetProp) == null) {
-                cachedResults = elasticsearchFacets.getElasticSearchFacets(numberOfFacets);
+                cachedResults = elasticsearchFacets.getElasticSearchFacets(indexNode.getDefinition(), numberOfFacets);
             }
             return cachedResults.get(facetProp);
         }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java Thu May 21 08:56:02 2020
@@ -47,8 +47,7 @@ public class ElasticsearchSearcher {
     public SearchResponse search(QueryBuilder query, int batchSize) throws IOException {
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                 .query(query)
-                .fetchSource(false)
-                .storedField(FieldNames.PATH)
+                .fetchSource(FieldNames.PATH, null)
                 .size(batchSize);
 
         SearchRequest request = new SearchRequest(indexNode.getDefinition().getRemoteIndexAlias())

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
 
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.query.QueryConstants;
@@ -33,7 +34,7 @@ public final class ElasticsearchAggregat
     private ElasticsearchAggregationBuilderUtil() {
     }
 
-    public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, int numberOfFacets) {
+    public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, ElasticsearchIndexDefinition indexDefinition, int numberOfFacets) {
         List<TermsAggregationBuilder> termsAggregationBuilders = new LinkedList<>();
         Collection<Filter.PropertyRestriction> propertyRestrictions = plan.getFilter().getPropertyRestrictions();
         for (Filter.PropertyRestriction propertyRestriction : propertyRestrictions) {
@@ -41,13 +42,14 @@ public final class ElasticsearchAggregat
             if (QueryConstants.REP_FACET.equals(name)) {
                 String value = propertyRestriction.first.getValue(Type.STRING);
                 String facetProp = FulltextIndex.parseFacetField(value);
-                termsAggregationBuilders.add(AggregationBuilders.terms(facetProp).field(keywordFieldName(facetProp)).size(numberOfFacets));
+                termsAggregationBuilders.add(
+                        AggregationBuilders
+                                .terms(facetProp)
+                                .field(indexDefinition.getElasticKeyword(facetProp))
+                                .size(numberOfFacets)
+                );
             }
         }
         return termsAggregationBuilders;
     }
-
-    private static String keywordFieldName(String propName) {
-        return propName + "." + "keyword";
-    }
 }

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java Thu May 21 08:56:02 2020
@@ -25,8 +25,7 @@ public class SearchSourceBuilderUtil {
     public static SearchSourceBuilder createSearchSourceBuilder(ElasticsearchSearcherModel elasticsearchSearcherModel) {
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                 .query(elasticsearchSearcherModel.getQueryBuilder())
-                .fetchSource(elasticsearchSearcherModel.fetchSource())
-                .storedField(elasticsearchSearcherModel.getStoredField())
+                .fetchSource(elasticsearchSearcherModel.getStoredField(), null)
                 .size(elasticsearchSearcherModel.getBatchSize())
                 .from(elasticsearchSearcherModel.getFrom());
 

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java Thu May 21 08:56:02 2020
@@ -67,11 +67,11 @@ public class TermQueryBuilderFactory {
     }
 
     public static PrefixQueryBuilder newPrefixQuery(String field, @NotNull String value) {
-        return prefixQuery(keywordFieldName(field), value);
+        return prefixQuery(field, value);
     }
 
     public static WildcardQueryBuilder newWildcardQuery(String field, @NotNull String value) {
-        return wildcardQuery(keywordFieldName(field), value);
+        return wildcardQuery(field, value);
     }
 
     public static TermQueryBuilder newPathQuery(String path) {
@@ -96,11 +96,11 @@ public class TermQueryBuilderFactory {
     }
 
     public static TermQueryBuilder newNodeTypeQuery(String type) {
-        return termQuery(keywordFieldName(JCR_PRIMARYTYPE), type);
+        return termQuery(JCR_PRIMARYTYPE, type);
     }
 
     public static TermQueryBuilder newMixinTypeQuery(String type) {
-        return termQuery(keywordFieldName(JCR_MIXINTYPES), type);
+        return termQuery(JCR_MIXINTYPES, type);
     }
 
     public static TermQueryBuilder newNotNullPropQuery(String propName) {
@@ -126,12 +126,9 @@ public class TermQueryBuilderFactory {
         return bq;
     }
 
-    public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName, boolean isString,
+    public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName,
                                                                Filter.PropertyRestriction pr,
                                                                Function<PropertyValue, R> propToObj) {
-        if (isString) {
-            propertyName = keywordFieldName(propertyName);
-        }
 
         R first = pr.first != null ? propToObj.apply(pr.first) : null;
         R last = pr.last != null ? propToObj.apply(pr.last) : null;
@@ -166,9 +163,4 @@ public class TermQueryBuilderFactory {
         }
         return path;
     }
-
-    // As per https://www.elastic.co/blog/strings-are-dead-long-live-strings
-    private static String keywordFieldName(String propName) {
-        return propName + "." + "keyword";
-    }
 }
\ No newline at end of file

Modified: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java Thu May 21 08:56:02 2020
@@ -20,7 +20,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.InitialContent;
 import org.apache.jackrabbit.oak.Oak;
 import org.apache.jackrabbit.oak.api.ContentRepository;
-import org.apache.jackrabbit.oak.commons.PerfLogger;
+import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate;
 import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler;
@@ -37,6 +37,9 @@ import org.apache.jackrabbit.oak.query.A
 import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.core.CountRequest;
+import org.elasticsearch.client.indices.GetIndexRequest;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.ClassRule;
@@ -44,7 +47,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.net.URI;
 import java.util.concurrent.TimeUnit;
 
 import static com.google.common.collect.Lists.newArrayList;
@@ -56,10 +58,6 @@ public abstract class ElasticsearchAbstr
 
     protected static final Logger LOG = LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class);
 
-    protected static final PerfLogger PERF_LOGGER =
-            new PerfLogger(LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class.getName() + ".perf"));
-
-
     // Set this connection string as
     // <scheme>://<hostname>:<port>?key_id=<>,key_secret=<>
     // key_id and key_secret are optional in case the ES server
@@ -72,11 +70,9 @@ public abstract class ElasticsearchAbstr
     // This can be used by the extending classes to trigger the async index update as per need (not having to wait for async indexing cycle)
     protected AsyncIndexUpdate asyncIndexUpdate;
     protected long INDEX_CORRUPT_INTERVAL_IN_MILLIS = 100;
-    protected ElasticsearchIndexEditorProvider editorProvider;
     protected NodeStore nodeStore;
     protected int DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS = 5;
 
-
     @ClassRule
     public static ElasticsearchConnectionRule elasticRule = new ElasticsearchConnectionRule(elasticConnectionString);
 
@@ -131,12 +127,11 @@ public abstract class ElasticsearchAbstr
     }
 
     protected Oak addAsyncIndexingLanesToOak(Oak oak) {
-        // Override this in extending clases to configure different
+        // Override this in extending classes to configure different
         // indexing lanes with different time limits.
         return oak.withAsyncIndexing("async", DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS);
     }
 
-
     @Override
     protected ContentRepository createRepository() {
 
@@ -156,7 +151,6 @@ public abstract class ElasticsearchAbstr
         trackingCorruptIndexHandler.setCorruptInterval(INDEX_CORRUPT_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);
         asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler);
 
-
         Oak oak = new Oak(nodeStore)
                 .with(getInitialContent())
                 .with(new OpenSecurityProvider())
@@ -171,7 +165,6 @@ public abstract class ElasticsearchAbstr
         return oak.createContentRepository();
     }
 
-
     protected static void assertEventually(Runnable r) {
         ElasticsearchTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 5);
     }
@@ -188,8 +181,8 @@ public abstract class ElasticsearchAbstr
         return builder;
     }
 
-    protected void setIndex(String idxName, IndexDefinitionBuilder builder) {
-        builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
+    protected Tree setIndex(String idxName, IndexDefinitionBuilder builder) {
+        return builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
     }
 
     protected String explain(String query) {
@@ -206,4 +199,36 @@ public abstract class ElasticsearchAbstr
         setTraversalEnabled(false);
     }
 
+    // Utility methods accessing directly Elasticsearch
+
+    protected boolean exists(Tree index) {
+        ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index);
+
+        try {
+            return esConnection.getClient().indices()
+                    .exists(new GetIndexRequest(esIdxDef.getRemoteIndexAlias()), RequestOptions.DEFAULT);
+        } catch (IOException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
+    protected long countDocuments(Tree index) {
+        ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index);
+
+        CountRequest request = new CountRequest(esIdxDef.getRemoteIndexAlias());
+        try {
+            return esConnection.getClient().count(request, RequestOptions.DEFAULT).getCount();
+        } catch (IOException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
+    private ElasticsearchIndexDefinition getElasticsearchIndexDefinition(Tree index) {
+        return new ElasticsearchIndexDefinition(
+                nodeStore.getRoot(),
+                nodeStore.getRoot().getChildNode(INDEX_DEFINITIONS_NAME).getChildNode(index.getName()),
+                index.getPath(),
+                esConnection.getIndexPrefix());
+    }
+
 }

Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java?rev=1877992&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java Thu May 21 08:56:02 2020
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.UUID;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+public class ElasticsearchContentTest extends ElasticsearchAbstractQueryTest {
+
+    @Test
+    public void indexWithAnalyzedProperty() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild("indexed").setProperty("a", "foo");
+        content.addChild("not-indexed").setProperty("b", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+
+        content.getChild("indexed").remove();
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(0L)));
+
+        // TODO: should the index be deleted when the definition gets removed?
+        //index.remove();
+        //root.commit();
+
+        //assertFalse(exists(index));
+    }
+
+    @Test
+    @Ignore("this test fails because of a bug with nodeScopeIndex (every node gets indexed in an empty doc)")
+    public void indexWithAnalyzedNodeScopeIndexProperty() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed().nodeScopeIndex();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild("indexed").setProperty("a", "foo");
+        content.addChild("not-indexed").setProperty("b", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+    }
+
+    @Test
+    public void indexContentWithLongPath() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        int leftLimit = 48; // ' ' (space)
+        int rightLimit = 122; // char '~'
+        int targetStringLength = 1024;
+        final Random random = new Random(42);
+
+        String generatedPath = random.ints(leftLimit, rightLimit + 1)
+                .limit(targetStringLength)
+                .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
+                .toString();
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild(generatedPath).setProperty("a", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+    }
+
+    @Test
+    public void defineIndexTwice() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+
+        builder = createIndex("a").noAsync();
+        index = setIndex(testId, builder);
+        root.commit();
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
------------------------------------------------------------------------------
    svn:eol-style = native