You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ng...@apache.org on 2020/05/21 08:56:03 UTC
svn commit: r1877992 [1/2] - in /jackrabbit/oak/trunk: oak-search-elastic/
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/fa...
Author: ngupta
Date: Thu May 21 08:56:02 2020
New Revision: 1877992
URL: http://svn.apache.org/viewvc?rev=1877992&view=rev
Log:
OAK-9061|oak-search-elastic: mapping consistent with index definition(committing patch by Fabrizio)
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelperTest.java (with props)
Modified:
jackrabbit/oak/trunk/oak-search-elastic/pom.xml
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFacetTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFullTextAsyncTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriterTest.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java
Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu May 21 08:56:02 2020
@@ -239,7 +239,7 @@
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
- <version>1.3</version>
+ <version>2.2</version>
<scope>test</scope>
</dependency>
<dependency>
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java Thu May 21 08:56:02 2020
@@ -18,11 +18,17 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.spi.state.NodeState;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@@ -56,6 +62,16 @@ public class ElasticsearchIndexDefinitio
.map(Object::toString)
.collect(Collectors.joining("")));
+ private static final Function<Integer, Boolean> isAnalyzable;
+
+ static {
+ int[] NOT_ANALYZED_TYPES = new int[] {
+ Type.BINARY.tag(), Type.LONG.tag(), Type.DOUBLE.tag(), Type.DECIMAL.tag(), Type.DATE.tag(), Type.BOOLEAN.tag()
+ };
+ Arrays.sort(NOT_ANALYZED_TYPES); // need for binary search
+ isAnalyzable = type -> Arrays.binarySearch(NOT_ANALYZED_TYPES, type) < 0;
+ }
+
private final String remoteIndexName;
public final int bulkActions;
@@ -66,6 +82,8 @@ public class ElasticsearchIndexDefinitio
private final String indexPrefix;
private final String remoteAlias;
+ private final Map<String, List<PropertyDefinition>> propertiesByName;
+
public ElasticsearchIndexDefinition(NodeState root, NodeState defn, String indexPath, String indexPrefix) {
super(root, getIndexDefinitionState(defn), determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
boolean isReindex = defn.getBoolean(IndexConstants.REINDEX_PROPERTY_NAME);
@@ -78,6 +96,12 @@ public class ElasticsearchIndexDefinitio
this.bulkFlushIntervalMs = getOptionalValue(defn, BULK_FLUSH_INTERVAL_MS, BULK_FLUSH_INTERVAL_MS_DEFAULT);
this.bulkRetries = getOptionalValue(defn, BULK_RETRIES, BULK_RETRIES_DEFAULT);
this.bulkRetriesBackoff = getOptionalValue(defn, BULK_RETRIES_BACKOFF, BULK_RETRIES_BACKOFF_DEFAULT);
+
+ this.propertiesByName = getDefinedRules()
+ .stream()
+ .flatMap(rule -> StreamSupport.stream(rule.getProperties().spliterator(), false))
+ .filter(pd -> pd.index) // keep only properties that can be indexed
+ .collect(Collectors.groupingBy(pd -> pd.name));
}
/**
@@ -99,6 +123,35 @@ public class ElasticsearchIndexDefinitio
return remoteIndexName;
}
+ public Map<String, List<PropertyDefinition>> getPropertiesByName() {
+ return propertiesByName;
+ }
+
+ /**
+ * Returns the keyword field name mapped in Elasticsearch for the specified property name.
+ * @param propertyName the property name in the index rules
+ * @return the field name identifier in Elasticsearch
+ * @throws IllegalArgumentException if the specified name is not part of this {@code ElasticsearchIndexDefinition}
+ */
+ public String getElasticKeyword(String propertyName) {
+ List<PropertyDefinition> propertyDefinitions = propertiesByName.get(propertyName);
+ if (propertyDefinitions == null) {
+ throw new IllegalArgumentException(propertyName + " is not part of this ElasticsearchIndexDefinition");
+ }
+
+ String field = propertyName;
+ // it's ok to look at the first property since we are sure they all have the same type
+ int type = propertyDefinitions.get(0).getType();
+ if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {
+ field += ".keyword";
+ }
+ return field;
+ }
+
+ public boolean isAnalyzed(List<PropertyDefinition> propertyDefinitions) {
+ return propertyDefinitions.stream().anyMatch(pd -> pd.analyzed || pd.fulltextEnabled());
+ }
+
private String setupAlias() {
// TODO: implement advanced remote index name strategy that takes into account multiple tenants and re-index process
return getESSafeIndexName(indexPrefix + "." + getIndexPath());
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java Thu May 21 08:56:02 2020
@@ -20,17 +20,17 @@ package org.apache.jackrabbit.oak.plugin
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexNode;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Map;
public class ElasticFacetHelper {
@@ -58,32 +58,28 @@ public class ElasticFacetHelper {
return elasticsearchFacets;
}
- public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) throws UnsupportedEncodingException {
+ public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) {
List<String> accessibleDocs = new LinkedList<>();
for (SearchHit searchHit : searchHits) {
- String id = searchHit.getId();
- String path = idToPath(id);
+ final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+ String path = (String) sourceMap.get(FieldNames.PATH);
if (filter.isAccessible(path)) {
- accessibleDocs.add(id);
+ accessibleDocs.add(path);
}
}
return accessibleDocs;
}
- public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) throws UnsupportedEncodingException {
+ public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) {
int count = 0;
while (searchHitIterator.hasNext()) {
SearchHit searchHit = searchHitIterator.next();
- String id = searchHit.getId();
- String path = idToPath(id);
+ final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+ String path = (String) sourceMap.get(FieldNames.PATH);
if (filter.isAccessible(path)) {
count++;
}
}
return count;
}
-
- public static String idToPath(String id) throws UnsupportedEncodingException {
- return URLDecoder.decode(id, "UTF-8");
- }
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
@@ -49,7 +50,8 @@ public interface ElasticsearchFacets {
* @return A map with facetName as key and List of facets in descending order of facetCount.
* @throws IOException
*/
- Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException;
+ Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+ int numberOfFacets) throws IOException;
/**
* We can retrieve Aggregation in a single call to elastic search while querying. Which can then be passed
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -51,12 +52,13 @@ public class InsecureElasticSearchFacets
}
@Override
- public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+ int numberOfFacets) throws IOException {
if (elasticsearchAggregationData != null && numberOfFacets <= elasticsearchAggregationData.getNumberOfFacets()) {
return changeToFacetList(elasticsearchAggregationData.getAggregations().getAsMap(), numberOfFacets);
}
LOG.warn("Facet data is being retrieved by again calling Elasticsearch");
- List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, numberOfFacets);
+ List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, indexDefinition, numberOfFacets);
ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
.withQuery(query)
.withAggregation(aggregationBuilders)
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -50,7 +51,8 @@ public class SecureElasticSearchFacets e
for docs.
*/
@Override
- public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+ int numberOfFacets) throws IOException {
Map<String, Map<String, Long>> secureFacetCount = new HashMap<>();
Filter filter = getPlan().getFilter();
boolean doFetch = true;
@@ -70,7 +72,8 @@ public class SecureElasticSearchFacets e
List<String> accessibleDocs = ElasticFacetHelper.getAccessibleDocIds(searchHits, filter);
if (accessibleDocs.isEmpty()) continue;
QueryBuilder queryWithAccessibleDocIds = QueryBuilders.termsQuery("_id", accessibleDocs);
- Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds, accessibleDocs.size());
+ Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds,
+ accessibleDocs.size(), indexDefinition);
collateAggregations(secureFacetCount, accessibleDocsAggregation);
}
@@ -115,15 +118,15 @@ public class SecureElasticSearchFacets e
}
}
- private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount) throws IOException {
- List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), facetCount);
+ private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount,
+ ElasticsearchIndexDefinition indexDefinition) throws IOException {
+ List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), indexDefinition, facetCount);
ElasticsearchSearcherModel idBasedelasticsearchSearcherModelWithAggregation = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
.withQuery(queryWithAccessibleDocIds)
.withAggregation(aggregationBuilders)
.build();
SearchResponse facetDocs = getSearcher().search(idBasedelasticsearchSearcherModelWithAggregation);
- Map<String, Aggregation> aggregationMap = facetDocs.getAggregations().asMap();
- return aggregationMap;
+ return facetDocs.getAggregations().asMap();
}
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
import com.google.common.collect.AbstractIterator;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
@@ -52,7 +53,9 @@ public class StatisticalElasticSearchFac
this.secureFacetConfiguration = secureFacetConfiguration;
}
- public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ @Override
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+ int numberOfFacets) throws IOException {
Map<String, List<FulltextIndex.Facet>> result = new HashMap<>();
Map<String, List<FulltextIndex.Facet>> topChildren;
Filter filter = getPlan().getFilter();
@@ -61,7 +64,7 @@ public class StatisticalElasticSearchFac
ElasticsearchAggregationData aggregationData = getElasticsearchAggregationData();
if (aggregationData == null || aggregationData.getNumberOfFacets() < numberOfFacets) {
LOG.warn("Facets and Totalhit count are being retrieved by calling Elasticsearch");
- topChildren = super.getElasticSearchFacets(numberOfFacets);
+ topChildren = super.getElasticSearchFacets(indexDefinition, numberOfFacets);
ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
.withQuery(getQuery())
.withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
@@ -79,7 +82,8 @@ public class StatisticalElasticSearchFac
// instead of statistical count. <OAK-8138>
if (hitCount < sampleSize) {
LOG.debug("SampleSize: {} is greater than hitcount: {}, Getting secure facet count", sampleSize, hitCount);
- return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(numberOfFacets);
+ return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(indexDefinition,
+ numberOfFacets);
}
long randomSeed = secureFacetConfiguration.getRandomSeed();
Iterator<SearchHit> docIterator = getMatchingDocIterator(getSearcher(), getQuery());
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java Thu May 21 08:56:02 2020
@@ -25,8 +25,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -35,10 +33,7 @@ import java.util.Map;
class ElasticsearchDocument {
private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchDocument.class);
- // id should only be useful for logging (at least as of now)
private final String path;
-
- private final String id;
private final List<String> fulltext;
private final List<String> suggest;
private final List<String> notNullProps;
@@ -47,13 +42,6 @@ class ElasticsearchDocument {
ElasticsearchDocument(String path) {
this.path = path;
- String id = null;
- try {
- id = pathToId(path);
- } catch (UnsupportedEncodingException e) {
- LOG.warn("Couldn't encode {} as ES id", path);
- }
- this.id = id;
this.fulltext = new ArrayList<>();
this.suggest = new ArrayList<>();
this.notNullProps = new ArrayList<>();
@@ -93,24 +81,20 @@ class ElasticsearchDocument {
String parPath = PathUtils.getParentPath(path);
int depth = PathUtils.getDepth(path);
- // TODO: remember that mapping must be configured with
- // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
addProperty(FieldNames.ANCESTORS, parPath);
addProperty(FieldNames.PATH_DEPTH, depth);
}
- String getId() {
- return id;
- }
-
public String build() {
- String ret = null;
+ String ret;
try {
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
- if (fulltext.size() > 0) {
- builder.field(FieldNames.FULLTEXT, fulltext);
- }
+ {
+ builder.field(FieldNames.PATH, path);
+ if (fulltext.size() > 0) {
+ builder.field(FieldNames.FULLTEXT, fulltext);
+ }
if (suggest.size() > 0) {
builder.startObject(FieldNames.SUGGEST).field("input", suggest).endObject();
}
@@ -120,17 +104,18 @@ class ElasticsearchDocument {
if (nullProps.size() > 0) {
builder.field(FieldNames.NULL_PROPS, nullProps);
}
- for (Map.Entry<String, Object> prop : properties.entrySet()) {
- builder.field(prop.getKey(), prop.getValue());
+ for (Map.Entry<String, Object> prop : properties.entrySet()) {
+ builder.field(prop.getKey(), prop.getValue());
+ }
}
builder.endObject();
ret = Strings.toString(builder);
} catch (IOException e) {
- LOG.error("Error serializing document - id: {}, properties: {}, fulltext: {}, suggest: {}, " +
+ LOG.error("Error serializing document - path: {}, properties: {}, fulltext: {}, suggest: {}, " +
"notNullProps: {}, nullProps: {}",
- path, properties, fulltext, suggest, notNullProps, nullProps,
- e);
+ path, properties, fulltext, suggest, notNullProps, nullProps, e);
+ ret = null;
}
return ret;
@@ -140,8 +125,4 @@ class ElasticsearchDocument {
public String toString() {
return build();
}
-
- public static String pathToId(String path) throws UnsupportedEncodingException {
- return URLEncoder.encode(path, "UTF-8");
- }
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Thu May 21 08:56:02 2020
@@ -48,10 +48,9 @@ class ElasticsearchDocumentMaker extends
}
@Override
- protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) throws IOException {
- if (doc.getId() == null) {
- throw new IOException("Couldn't generate id for doc - (More details during initDoc)" + doc);
- }
+ protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) {
+ // evaluate path restrictions is enabled by default in elastic. Always index ancestors
+ doc.indexAncestors(path);
return doc;
}
@@ -134,10 +133,13 @@ class ElasticsearchDocumentMaker extends
doc.addProperty(pname, f);
}
+ /**
+ * Empty method implementation. Ancestors are always indexed
+ *
+ * @see ElasticsearchDocumentMaker#finalizeDoc
+ */
@Override
- protected void indexAncestors(ElasticsearchDocument doc, String path) {
- doc.indexAncestors(path);
- }
+ protected void indexAncestors(ElasticsearchDocument doc, String path) { /* empty */ }
@Override
protected void indexNotNullProperty(ElasticsearchDocument doc, PropertyDefinition pd) {
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java?rev=1877992&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java Thu May 21 08:56:02 2020
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.index;
+
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
+import org.elasticsearch.client.indices.CreateIndexRequest;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Provides utility functions around Elasticsearch indexing
+ */
+class ElasticsearchIndexHelper {
+
+ public static CreateIndexRequest createIndexRequest(ElasticsearchIndexDefinition indexDefinition) throws IOException {
+ final CreateIndexRequest request = new CreateIndexRequest(indexDefinition.getRemoteIndexName());
+
+ // provision settings
+ // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
+ request.settings(Settings.builder()
+ .put("analysis.analyzer.ancestor_analyzer.type", "custom")
+ .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy"));
+
+ // provision mappings
+ final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
+ mappingBuilder.startObject();
+ {
+ mappingBuilder.startObject("properties");
+ {
+ mapInternalProperties(mappingBuilder);
+ mapIndexRules(indexDefinition, mappingBuilder);
+ }
+ mappingBuilder.endObject();
+ }
+ mappingBuilder.endObject();
+ request.mapping(mappingBuilder);
+
+ return request;
+ }
+
+ private static void mapInternalProperties(XContentBuilder mappingBuilder) throws IOException {
+ mappingBuilder.startObject(FieldNames.PATH)
+ .field("type", "keyword")
+ .endObject();
+ mappingBuilder.startObject(FieldNames.ANCESTORS)
+ .field("type", "text")
+ .field("analyzer", "ancestor_analyzer")
+ .field("search_analyzer", "keyword")
+ .field("search_quote_analyzer", "keyword")
+ .endObject();
+ mappingBuilder.startObject(FieldNames.PATH_DEPTH)
+ .field("type", "integer")
+ .endObject();
+ // TODO: the mapping below is for features currently not supported. These need to be reviewed
+ // when the specific features will be implemented
+// mappingBuilder.startObject(FieldNames.SUGGEST)
+// .field("type", "completion")
+// .endObject();
+// mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
+// .field("type", "keyword")
+// .endObject();
+// mappingBuilder.startObject(FieldNames.NULL_PROPS)
+// .field("type", "keyword")
+// .endObject();
+ }
+
+ private static void mapIndexRules(ElasticsearchIndexDefinition indexDefinition, XContentBuilder mappingBuilder) throws IOException {
+ // we need to check if in the defined rules there are properties with the same name and different types
+ final List<Map.Entry<String, List<PropertyDefinition>>> multiTypesFields = indexDefinition.getPropertiesByName()
+ .entrySet()
+ .stream()
+ .filter(e -> e.getValue().size() > 1)
+ .filter(e -> e.getValue().stream().map(PropertyDefinition::getType).distinct().count() > 1)
+ .collect(Collectors.toList());
+
+ if (!multiTypesFields.isEmpty()) {
+ String fields = multiTypesFields.stream().map(Map.Entry::getKey).collect(Collectors.joining(", ", "[", "]"));
+ throw new IllegalStateException(indexDefinition.getIndexPath() + " has properties with the same name and " +
+ "different types " + fields);
+ }
+
+ for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
+ final String name = entry.getKey();
+ final List<PropertyDefinition> propertyDefinitions = entry.getValue();
+
+ Type<?> type = Type.fromTag(propertyDefinitions.get(0).getType(), false);
+ mappingBuilder.startObject(name);
+ {
+ // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+ if (Type.BINARY.equals(type)) {
+ mappingBuilder.field("type", "binary");
+ } else if (Type.LONG.equals(type)) {
+ mappingBuilder.field("type", "long");
+ } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) {
+ mappingBuilder.field("type", "double");
+ } else if (Type.DATE.equals(type)) {
+ mappingBuilder.field("type", "date");
+ } else if (Type.BOOLEAN.equals(type)) {
+ mappingBuilder.field("type", "boolean");
+ } else {
+ if (indexDefinition.isAnalyzed(propertyDefinitions)) {
+ mappingBuilder.field("type", "text");
+ // always add keyword for sorting / faceting as sub-field
+ mappingBuilder.startObject("fields");
+ {
+ mappingBuilder.startObject("keyword")
+ .field("type", "keyword")
+ .endObject();
+ }
+ mappingBuilder.endObject();
+ } else {
+ // always add keyword for sorting / faceting
+ mappingBuilder.field("type", "keyword");
+ }
+ }
+ }
+ mappingBuilder.endObject();
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java Thu May 21 08:56:02 2020
@@ -18,8 +18,8 @@ package org.apache.jackrabbit.oak.plugin
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
-import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriter;
+import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
@@ -37,13 +37,11 @@ import org.elasticsearch.client.IndicesC
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
+import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.cluster.metadata.AliasMetaData;
import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.TestOnly;
@@ -51,12 +49,17 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Phaser;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.Map;
-import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
-import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchDocument.pathToId;
import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@@ -66,8 +69,19 @@ class ElasticsearchIndexWriter implement
private final ElasticsearchConnection elasticsearchConnection;
private final ElasticsearchIndexDefinition indexDefinition;
+ /**
+ * Coordinates communication between bulk processes. It has a main controller registered at creation time and
+ * de-registered on {@link ElasticsearchIndexWriter#close(long)}. Each bulk request register a new party in
+ * this Phaser in {@link OakBulkProcessorListener#beforeBulk(long, BulkRequest)} and de-register itself when
+ * the request returns.
+ */
+ private final Phaser phaser = new Phaser(1); // register main controller
+ /**
+ * Key-value structure to keep the history of bulk requests. Keys are the bulk execution ids, the boolean
+ * value is {@code true} when at least an update is performed, otherwise {@code false}.
+ */
+ private final ConcurrentHashMap<Long, Boolean> updatesMap = new ConcurrentHashMap<>();
private final BulkProcessor bulkProcessor;
- private Optional<Boolean> indexUpdated = Optional.empty();
ElasticsearchIndexWriter(@NotNull ElasticsearchConnection elasticsearchConnection,
@NotNull ElasticsearchIndexDefinition indexDefinition) {
@@ -99,70 +113,76 @@ class ElasticsearchIndexWriter implement
}
@Override
- public void updateDocument(String path, ElasticsearchDocument doc) throws IOException {
+ public void updateDocument(String path, ElasticsearchDocument doc) {
IndexRequest request = new IndexRequest(indexDefinition.getRemoteIndexAlias())
- .id(pathToId(path))
+ .id(idFromPath(path))
.source(doc.build(), XContentType.JSON);
bulkProcessor.add(request);
}
@Override
- public void deleteDocuments(String path) throws IOException {
+ public void deleteDocuments(String path) {
DeleteRequest request = new DeleteRequest(indexDefinition.getRemoteIndexAlias())
- .id(pathToId(path));
+ .id(idFromPath(path));
bulkProcessor.add(request);
}
@Override
- public boolean close(long timestamp) throws IOException {
+ public boolean close(long timestamp) {
LOG.trace("Calling close on bulk processor {}", bulkProcessor);
bulkProcessor.close();
LOG.trace("Bulk Processor {} closed", bulkProcessor);
- // bulkProcessor.close() calls the OakBulkProcessorListener.beforeBulk in a blocking manner
- // indexUpdated would be unset there if it was false till now (not even a single update succeeded)
- // in this case wait for sometime for the last OakBulkProcessorListener.afterBulk to be called
- // where indexUpdated can possibly be set to true, return false in case of timeout.
- // We don't wait in case indexUpdated is already set (This would be if any of the previous flushes for this processor
- // were successful i.e index was updated at least once)
- final long start = System.currentTimeMillis();
- long timeoutMillis = indexDefinition.bulkFlushIntervalMs * 5 ;
- while (!indexUpdated.isPresent()) {
- long lastAttempt = System.currentTimeMillis();
- long elapsedTime = lastAttempt - start;
- if (elapsedTime > timeoutMillis) {
- // indexUpdate was not set till now, return false
- LOG.trace("Timed out waiting for the bulk processor response. Returning indexUpdated = false");
- return false;
- } else {
- try {
- LOG.trace("Waiting for afterBulk response...");
- Thread.sleep(100);
- } catch (InterruptedException ex) {
- //
- }
- }
+ // de-register main controller
+ final int phase = phaser.arriveAndDeregister();
+
+ try {
+ phaser.awaitAdvanceInterruptibly(phase, indexDefinition.bulkFlushIntervalMs * 5, TimeUnit.MILLISECONDS);
+ } catch (InterruptedException | TimeoutException e) {
+ LOG.error("Error waiting for bulk requests to return", e);
+ }
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Bulk identifier -> update status = {}", updatesMap);
}
- LOG.trace("Returning indexUpdated = {}", indexUpdated.get());
- return indexUpdated.get();
+ return updatesMap.containsValue(Boolean.TRUE);
}
- // TODO: we need to check if the index already exists and in that case we have to figure out if there are
- // "breaking changes" in the index definition
protected void provisionIndex() throws IOException {
+ // check if index already exists
+ boolean exists = elasticsearchConnection.getClient().indices().exists(
+ new GetIndexRequest(indexDefinition.getRemoteIndexName()), RequestOptions.DEFAULT
+ );
+ if (exists) {
+ LOG.info("Index {} already exists. Skip index provision", indexDefinition.getRemoteIndexName());
+ return;
+ }
- IndicesClient indicesClient = elasticsearchConnection.getClient().indices();
+ final IndicesClient indicesClient = elasticsearchConnection.getClient().indices();
final String indexName = indexDefinition.getRemoteIndexName();
- CreateIndexRequest createIndexRequest = constructCreateIndexRequest(indexName);
- String requestMsg = Strings.toString(createIndexRequest.toXContent(jsonBuilder(), EMPTY_PARAMS));
- CreateIndexResponse response = indicesClient.create(createIndexRequest, RequestOptions.DEFAULT);
- checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
-
- LOG.info("Updated settings for index {} = {}. Response acknowledged: {}",
- indexDefinition.getRemoteIndexAlias(), requestMsg, response.isAcknowledged());
-
+ // create the new index
+ final CreateIndexRequest request = ElasticsearchIndexHelper.createIndexRequest(indexDefinition);
+ try {
+ if (LOG.isDebugEnabled()) {
+ final String requestMsg = Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS));
+ LOG.debug("Creating Index with request {}", requestMsg);
+ }
+ CreateIndexResponse response = indicesClient.create(request, RequestOptions.DEFAULT);
+ LOG.info("Updated settings for index {}. Response acknowledged: {}",
+ indexDefinition.getRemoteIndexName(), response.isAcknowledged());
+ checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName);
+ } catch (ElasticsearchStatusException ese) {
+ // We already check index existence as first thing in this method, if we get here it means we have got into
+ // a conflict (eg: multiple cluster nodes provision concurrently).
+ // Elasticsearch does not have a CREATE IF NOT EXIST, need to inspect exception
+ // https://github.com/elastic/elasticsearch/issues/19862
+ if (ese.status().getStatus() == 400 && ese.getDetailedMessage().contains("resource_already_exists_exception")) {
+ LOG.warn("Index {} already exists. Ignoring error", indexName);
+ } else throw ese;
+ }
+ // update the mapping
GetAliasesRequest getAliasesRequest = new GetAliasesRequest(indexDefinition.getRemoteIndexAlias());
GetAliasesResponse aliasesResponse = indicesClient.getAlias(getAliasesRequest, RequestOptions.DEFAULT);
Map<String, Set<AliasMetaData>> aliases = aliasesResponse.getAliases();
@@ -180,6 +200,8 @@ class ElasticsearchIndexWriter implement
+ indexDefinition.getRemoteIndexAlias());
LOG.info("Updated alias {} to index {}. Response acknowledged: {}", indexDefinition.getRemoteIndexAlias(),
indexName, updateAliasResponse.isAcknowledged());
+
+ // once the alias has been updated, we can safely remove the old index
deleteOldIndices(indicesClient, aliases.keySet());
}
@@ -201,57 +223,15 @@ class ElasticsearchIndexWriter implement
LOG.info("Deleted indices {}. Response acknowledged: {}", indices.toString(), deleteIndexResponse.isAcknowledged());
}
- private CreateIndexRequest constructCreateIndexRequest(String indexName) throws IOException {
- CreateIndexRequest request = new CreateIndexRequest(indexName);
-
- // provision settings
- request.settings(Settings.builder()
- .put("analysis.analyzer.ancestor_analyzer.type", "custom")
- .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy"));
-
- // provision mappings
- XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
- mappingBuilder.startObject();
- {
- mappingBuilder.startObject("properties");
- {
- mappingBuilder.startObject(FieldNames.ANCESTORS)
- .field("type", "text")
- .field("analyzer", "ancestor_analyzer")
- .field("search_analyzer", "keyword")
- .field("search_quote_analyzer", "keyword")
- .endObject();
- mappingBuilder.startObject(FieldNames.PATH_DEPTH)
- .field("type", "integer")
- .endObject();
- mappingBuilder.startObject(FieldNames.SUGGEST)
- .field("type", "completion")
- .endObject();
- mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
- .field("type", "keyword")
- .endObject();
- mappingBuilder.startObject(FieldNames.NULL_PROPS)
- .field("type", "keyword")
- .endObject();
- }
- mappingBuilder.endObject();
- }
- mappingBuilder.endObject();
- request.mapping(mappingBuilder);
-
- return request;
- }
-
private class OakBulkProcessorListener implements BulkProcessor.Listener {
@Override
public void beforeBulk(long executionId, BulkRequest bulkRequest) {
- if (indexUpdated.isPresent() && !indexUpdated.get()) {
- // Reset the state only if it's false
- // If it's true that means index was updated at least once by this processor
- // and we can return true for indexUpdate.
- indexUpdated = Optional.empty();
- }
+ // register new bulk party
+ phaser.register();
+ // init update status
+ updatesMap.put(executionId, Boolean.FALSE);
+
LOG.info("Sending bulk with id {} -> {}", executionId, bulkRequest.getDescription());
if (LOG.isTraceEnabled()) {
LOG.trace("Bulk Requests: \n{}", bulkRequest.requests()
@@ -279,29 +259,41 @@ class ElasticsearchIndexWriter implement
LOG.error("Bulk item with id {} failed", failure.getId(), failure.getCause());
} else {
// Set indexUpdated to true even if 1 item was updated successfully
- indexUpdated = Optional.of(true);
+ updatesMap.put(executionId, Boolean.TRUE);
}
}
- // Only set indexUpdated to false if it's unset
- // If set and true, that means index was updated at least once by this processor.
- // If set and false, no need to do anything
- if (!indexUpdated.isPresent()) {
- indexUpdated = Optional.of(false);
- }
} else {
- indexUpdated = Optional.of(true);
+ updatesMap.put(executionId, Boolean.TRUE);
}
+ phaser.arriveAndDeregister();
}
@Override
public void afterBulk(long executionId, BulkRequest bulkRequest, Throwable throwable) {
- // Only set indexUpdated to false if it's unset
- // If set and true, that means index was updated at least once by this processor.
- // If set and false, no need to do anything
- if (!indexUpdated.isPresent()) {
- indexUpdated = Optional.of(false);
- }
LOG.error("Bulk with id {} threw an error", executionId, throwable);
+ phaser.arriveAndDeregister();
+ }
+ }
+
+ /**
+ * Transforms a path into an _id compatible with Elasticsearch specification. The path cannot be larger than 512
+ * bytes. For performance reasons paths that are already compatible are returned untouched. Otherwise, SHA-256
+ * algorithm is used to return a transformed path (32 bytes max).
+ *
+ * @param path the document path
+ * @return the Elasticsearch compatible path
+ * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html">
+ * Mapping _id field</a>
+ */
+ private static String idFromPath(@NotNull String path) {
+ byte[] pathBytes = path.getBytes(StandardCharsets.UTF_8);
+ if (pathBytes.length > 512) {
+ try {
+ return new String(MessageDigest.getInstance("SHA-256").digest(pathBytes));
+ } catch (NoSuchAlgorithmException e) {
+ throw new IllegalStateException(e);
+ }
}
+ return path;
}
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java Thu May 21 08:56:02 2020
@@ -90,7 +90,9 @@ class ElasticsearchIndex extends Fulltex
@Override
protected String getFulltextRequestString(IndexPlan plan, IndexNode indexNode) {
- return Strings.toString(ElasticsearchResultRowIterator.getESQuery(plan, getPlanResult(plan)));
+ return Strings.toString(new ElasticsearchResultRowIterator(plan.getFilter(), getPlanResult(plan), plan,
+ acquireIndexNode(plan), FulltextIndex::shouldInclude, getEstimator(plan.getPlanName()))
+ .getESQuery(plan, getPlanResult(plan)));
}
@Override
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java Thu May 21 08:56:02 2020
@@ -53,8 +53,6 @@ import org.slf4j.LoggerFactory;
import javax.jcr.PropertyType;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
@@ -68,7 +66,6 @@ import java.util.stream.StreamSupport;
import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
-import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.TermQueryBuilderFactory.newAncestorQuery;
@@ -158,9 +155,10 @@ class ElasticsearchResultRowIterator imp
try {
ElasticsearchSearcher searcher = getCurrentSearcher(indexNode);
QueryBuilder query = getESQuery(plan, planResult);
- int numberOfFacets = indexNode.getDefinition().getNumberOfTopFacets();
+ ElasticsearchIndexDefinition indexDefinition = indexNode.getDefinition();
+ int numberOfFacets = indexDefinition.getNumberOfTopFacets();
List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil
- .getAggregators(plan, numberOfFacets);
+ .getAggregators(plan, indexDefinition, numberOfFacets);
ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
.withQuery(query)
@@ -236,10 +234,9 @@ class ElasticsearchResultRowIterator imp
return new ElasticsearchSearcher(indexNode);
}
- private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
- ElasticsearchFacetProvider elasticsearchFacetProvider) throws IOException {
- String id = hit.getId();
- String path = idToPath(id);
+ private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit, ElasticsearchFacetProvider elasticsearchFacetProvider) {
+ final Map<String, Object> sourceMap = hit.getSourceAsMap();
+ String path = (String) sourceMap.get(FieldNames.PATH);
if (path != null) {
if ("".equals(path)) {
path = "/";
@@ -276,7 +273,7 @@ class ElasticsearchResultRowIterator imp
* @param planResult
* @return the Lucene query
*/
- static QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
+ public QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
List<QueryBuilder> qs = new ArrayList<>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
@@ -504,13 +501,12 @@ class ElasticsearchResultRowIterator imp
return unwrapped;
}
- private static void addNonFullTextConstraints(List<QueryBuilder> qs,
+ private void addNonFullTextConstraints(List<QueryBuilder> qs,
IndexPlan plan, PlanResult planResult) {
final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
StreamSupport.stream(iterable.spliterator(), false).anyMatch(value::equals);
Filter filter = plan.getFilter();
- IndexDefinition defn = planResult.indexDefinition;
if (!filter.matchesAllTypes()) {
addNodeTypeConstraints(planResult.indexingRule, qs, filter);
}
@@ -518,20 +514,15 @@ class ElasticsearchResultRowIterator imp
String path = FulltextIndex.getPathRestriction(plan);
switch (filter.getPathRestriction()) {
case ALL_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- if ("/".equals(path)) {
- break;
- }
+ if (!"/".equals(path)) {
qs.add(newAncestorQuery(path));
}
break;
case DIRECT_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- BoolQueryBuilder bq = boolQuery();
- bq.must(newAncestorQuery(path));
- bq.must(newDepthQuery(path, planResult));
- qs.add(bq);
- }
+ BoolQueryBuilder bq = boolQuery();
+ bq.must(newAncestorQuery(path));
+ bq.must(newDepthQuery(path, planResult));
+ qs.add(bq);
break;
case EXACT:
// For transformed paths, we can only add path restriction if absolute path to property can be
@@ -588,7 +579,7 @@ class ElasticsearchResultRowIterator imp
if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
&& pr.lastIncluding) {
- String first = pr.first.getValue(STRING);
+ String first = pr.first.getValue(Type.STRING);
first = first.replace("\\", "");
if (JCR_PATH.equals(name)) {
qs.add(newPathQuery(first));
@@ -637,7 +628,7 @@ class ElasticsearchResultRowIterator imp
}
private static QueryBuilder createNodeNameQuery(Filter.PropertyRestriction pr) {
- String first = pr.first != null ? pr.first.getValue(STRING) : null;
+ String first = pr.first != null ? pr.first.getValue(Type.STRING) : null;
if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
&& pr.lastIncluding) {
// [property]=[value]
@@ -685,7 +676,7 @@ class ElasticsearchResultRowIterator imp
}
@Nullable
- private static QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
+ private QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr,
PropertyDefinition defn) {
int propType = FulltextIndex.determinePropertyType(defn, pr);
@@ -699,31 +690,29 @@ class ElasticsearchResultRowIterator imp
return newNotNullPropQuery(defn.name);
}
+ final String field = indexNode.getDefinition().getElasticKeyword(propertyName);
+
QueryBuilder in;
switch (propType) {
case PropertyType.DATE: {
- in = newPropertyRestrictionQuery(propertyName, false, pr,
- value -> parse(value.getValue(Type.DATE)).getTime());
+ in = newPropertyRestrictionQuery(field, pr, value -> parse(value.getValue(Type.DATE)).getTime());
break;
}
case PropertyType.DOUBLE: {
- in = newPropertyRestrictionQuery(propertyName, false, pr,
- value -> value.getValue(Type.DOUBLE));
+ in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.DOUBLE));
break;
}
case PropertyType.LONG: {
- in = newPropertyRestrictionQuery(propertyName, false, pr,
- value -> value.getValue(Type.LONG));
+ in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.LONG));
break;
}
default: {
if (pr.isLike) {
- return createLikeQuery(propertyName, pr.first.getValue(STRING));
+ return createLikeQuery(propertyName, pr.first.getValue(Type.STRING));
}
//TODO Confirm that all other types can be treated as string
- in = newPropertyRestrictionQuery(propertyName, true, pr,
- value -> value.getValue(Type.STRING));
+ in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.STRING));
}
}
@@ -734,10 +723,6 @@ class ElasticsearchResultRowIterator imp
throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn);
}
- private static String idToPath(String id) throws UnsupportedEncodingException {
- return URLDecoder.decode(id, "UTF-8");
- }
-
class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider {
private ElasticsearchFacets elasticsearchFacets;
private Map<String, List<FulltextIndex.Facet>> cachedResults = new HashMap<>();
@@ -750,7 +735,7 @@ class ElasticsearchResultRowIterator imp
public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName) throws IOException {
String facetProp = FulltextIndex.parseFacetField(columnName);
if (cachedResults.get(facetProp) == null) {
- cachedResults = elasticsearchFacets.getElasticSearchFacets(numberOfFacets);
+ cachedResults = elasticsearchFacets.getElasticSearchFacets(indexNode.getDefinition(), numberOfFacets);
}
return cachedResults.get(facetProp);
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java Thu May 21 08:56:02 2020
@@ -47,8 +47,7 @@ public class ElasticsearchSearcher {
public SearchResponse search(QueryBuilder query, int batchSize) throws IOException {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(query)
- .fetchSource(false)
- .storedField(FieldNames.PATH)
+ .fetchSource(FieldNames.PATH, null)
.size(batchSize);
SearchRequest request = new SearchRequest(indexNode.getDefinition().getRemoteIndexAlias())
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
@@ -33,7 +34,7 @@ public final class ElasticsearchAggregat
private ElasticsearchAggregationBuilderUtil() {
}
- public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, int numberOfFacets) {
+ public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, ElasticsearchIndexDefinition indexDefinition, int numberOfFacets) {
List<TermsAggregationBuilder> termsAggregationBuilders = new LinkedList<>();
Collection<Filter.PropertyRestriction> propertyRestrictions = plan.getFilter().getPropertyRestrictions();
for (Filter.PropertyRestriction propertyRestriction : propertyRestrictions) {
@@ -41,13 +42,14 @@ public final class ElasticsearchAggregat
if (QueryConstants.REP_FACET.equals(name)) {
String value = propertyRestriction.first.getValue(Type.STRING);
String facetProp = FulltextIndex.parseFacetField(value);
- termsAggregationBuilders.add(AggregationBuilders.terms(facetProp).field(keywordFieldName(facetProp)).size(numberOfFacets));
+ termsAggregationBuilders.add(
+ AggregationBuilders
+ .terms(facetProp)
+ .field(indexDefinition.getElasticKeyword(facetProp))
+ .size(numberOfFacets)
+ );
}
}
return termsAggregationBuilders;
}
-
- private static String keywordFieldName(String propName) {
- return propName + "." + "keyword";
- }
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java Thu May 21 08:56:02 2020
@@ -25,8 +25,7 @@ public class SearchSourceBuilderUtil {
public static SearchSourceBuilder createSearchSourceBuilder(ElasticsearchSearcherModel elasticsearchSearcherModel) {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(elasticsearchSearcherModel.getQueryBuilder())
- .fetchSource(elasticsearchSearcherModel.fetchSource())
- .storedField(elasticsearchSearcherModel.getStoredField())
+ .fetchSource(elasticsearchSearcherModel.getStoredField(), null)
.size(elasticsearchSearcherModel.getBatchSize())
.from(elasticsearchSearcherModel.getFrom());
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java Thu May 21 08:56:02 2020
@@ -67,11 +67,11 @@ public class TermQueryBuilderFactory {
}
public static PrefixQueryBuilder newPrefixQuery(String field, @NotNull String value) {
- return prefixQuery(keywordFieldName(field), value);
+ return prefixQuery(field, value);
}
public static WildcardQueryBuilder newWildcardQuery(String field, @NotNull String value) {
- return wildcardQuery(keywordFieldName(field), value);
+ return wildcardQuery(field, value);
}
public static TermQueryBuilder newPathQuery(String path) {
@@ -96,11 +96,11 @@ public class TermQueryBuilderFactory {
}
public static TermQueryBuilder newNodeTypeQuery(String type) {
- return termQuery(keywordFieldName(JCR_PRIMARYTYPE), type);
+ return termQuery(JCR_PRIMARYTYPE, type);
}
public static TermQueryBuilder newMixinTypeQuery(String type) {
- return termQuery(keywordFieldName(JCR_MIXINTYPES), type);
+ return termQuery(JCR_MIXINTYPES, type);
}
public static TermQueryBuilder newNotNullPropQuery(String propName) {
@@ -126,12 +126,9 @@ public class TermQueryBuilderFactory {
return bq;
}
- public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName, boolean isString,
+ public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName,
Filter.PropertyRestriction pr,
Function<PropertyValue, R> propToObj) {
- if (isString) {
- propertyName = keywordFieldName(propertyName);
- }
R first = pr.first != null ? propToObj.apply(pr.first) : null;
R last = pr.last != null ? propToObj.apply(pr.last) : null;
@@ -166,9 +163,4 @@ public class TermQueryBuilderFactory {
}
return path;
}
-
- // As per https://www.elastic.co/blog/strings-are-dead-long-live-strings
- private static String keywordFieldName(String propName) {
- return propName + "." + "keyword";
- }
}
\ No newline at end of file
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java Thu May 21 08:56:02 2020
@@ -20,7 +20,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.InitialContent;
import org.apache.jackrabbit.oak.Oak;
import org.apache.jackrabbit.oak.api.ContentRepository;
-import org.apache.jackrabbit.oak.commons.PerfLogger;
+import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate;
import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler;
@@ -37,6 +37,9 @@ import org.apache.jackrabbit.oak.query.A
import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.core.CountRequest;
+import org.elasticsearch.client.indices.GetIndexRequest;
import org.jetbrains.annotations.NotNull;
import org.junit.After;
import org.junit.ClassRule;
@@ -44,7 +47,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.net.URI;
import java.util.concurrent.TimeUnit;
import static com.google.common.collect.Lists.newArrayList;
@@ -56,10 +58,6 @@ public abstract class ElasticsearchAbstr
protected static final Logger LOG = LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class);
- protected static final PerfLogger PERF_LOGGER =
- new PerfLogger(LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class.getName() + ".perf"));
-
-
// Set this connection string as
// <scheme>://<hostname>:<port>?key_id=<>,key_secret=<>
// key_id and key_secret are optional in case the ES server
@@ -72,11 +70,9 @@ public abstract class ElasticsearchAbstr
// This can be used by the extending classes to trigger the async index update as per need (not having to wait for async indexing cycle)
protected AsyncIndexUpdate asyncIndexUpdate;
protected long INDEX_CORRUPT_INTERVAL_IN_MILLIS = 100;
- protected ElasticsearchIndexEditorProvider editorProvider;
protected NodeStore nodeStore;
protected int DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS = 5;
-
@ClassRule
public static ElasticsearchConnectionRule elasticRule = new ElasticsearchConnectionRule(elasticConnectionString);
@@ -131,12 +127,11 @@ public abstract class ElasticsearchAbstr
}
protected Oak addAsyncIndexingLanesToOak(Oak oak) {
- // Override this in extending clases to configure different
+ // Override this in extending classes to configure different
// indexing lanes with different time limits.
return oak.withAsyncIndexing("async", DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS);
}
-
@Override
protected ContentRepository createRepository() {
@@ -156,7 +151,6 @@ public abstract class ElasticsearchAbstr
trackingCorruptIndexHandler.setCorruptInterval(INDEX_CORRUPT_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);
asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler);
-
Oak oak = new Oak(nodeStore)
.with(getInitialContent())
.with(new OpenSecurityProvider())
@@ -171,7 +165,6 @@ public abstract class ElasticsearchAbstr
return oak.createContentRepository();
}
-
protected static void assertEventually(Runnable r) {
ElasticsearchTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 5);
}
@@ -188,8 +181,8 @@ public abstract class ElasticsearchAbstr
return builder;
}
- protected void setIndex(String idxName, IndexDefinitionBuilder builder) {
- builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
+ protected Tree setIndex(String idxName, IndexDefinitionBuilder builder) {
+ return builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
}
protected String explain(String query) {
@@ -206,4 +199,36 @@ public abstract class ElasticsearchAbstr
setTraversalEnabled(false);
}
+ // Utility methods accessing directly Elasticsearch
+
+ protected boolean exists(Tree index) {
+ ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index);
+
+ try {
+ return esConnection.getClient().indices()
+ .exists(new GetIndexRequest(esIdxDef.getRemoteIndexAlias()), RequestOptions.DEFAULT);
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ protected long countDocuments(Tree index) {
+ ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index);
+
+ CountRequest request = new CountRequest(esIdxDef.getRemoteIndexAlias());
+ try {
+ return esConnection.getClient().count(request, RequestOptions.DEFAULT).getCount();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ private ElasticsearchIndexDefinition getElasticsearchIndexDefinition(Tree index) {
+ return new ElasticsearchIndexDefinition(
+ nodeStore.getRoot(),
+ nodeStore.getRoot().getChildNode(INDEX_DEFINITIONS_NAME).getChildNode(index.getName()),
+ index.getPath(),
+ esConnection.getIndexPrefix());
+ }
+
}
Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java?rev=1877992&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java Thu May 21 08:56:02 2020
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.UUID;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+public class ElasticsearchContentTest extends ElasticsearchAbstractQueryTest {
+
+ @Test
+ public void indexWithAnalyzedProperty() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("a").noAsync();
+ builder.indexRule("nt:base").property("a").analyzed();
+ String testId = UUID.randomUUID().toString();
+ Tree index = setIndex(testId, builder);
+ root.commit();
+
+ assertTrue(exists(index));
+ assertThat(0L, equalTo(countDocuments(index)));
+
+ Tree content = root.getTree("/").addChild(testId);
+ content.addChild("indexed").setProperty("a", "foo");
+ content.addChild("not-indexed").setProperty("b", "foo");
+ root.commit();
+
+ assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+
+ content.getChild("indexed").remove();
+ root.commit();
+
+ assertEventually(() -> assertThat(countDocuments(index), equalTo(0L)));
+
+ // TODO: should the index be deleted when the definition gets removed?
+ //index.remove();
+ //root.commit();
+
+ //assertFalse(exists(index));
+ }
+
+ @Test
+ @Ignore("this test fails because of a bug with nodeScopeIndex (every node gets indexed in an empty doc)")
+ public void indexWithAnalyzedNodeScopeIndexProperty() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("a").noAsync();
+ builder.indexRule("nt:base").property("a").analyzed().nodeScopeIndex();
+ String testId = UUID.randomUUID().toString();
+ Tree index = setIndex(testId, builder);
+ root.commit();
+
+ assertThat(0L, equalTo(countDocuments(index)));
+
+ Tree content = root.getTree("/").addChild(testId);
+ content.addChild("indexed").setProperty("a", "foo");
+ content.addChild("not-indexed").setProperty("b", "foo");
+ root.commit();
+
+ assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+ }
+
+ @Test
+ public void indexContentWithLongPath() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("a").noAsync();
+ builder.indexRule("nt:base").property("a").analyzed();
+ String testId = UUID.randomUUID().toString();
+ Tree index = setIndex(testId, builder);
+ root.commit();
+
+ assertTrue(exists(index));
+ assertThat(0L, equalTo(countDocuments(index)));
+
+ int leftLimit = 48; // ' ' (space)
+ int rightLimit = 122; // char '~'
+ int targetStringLength = 1024;
+ final Random random = new Random(42);
+
+ String generatedPath = random.ints(leftLimit, rightLimit + 1)
+ .limit(targetStringLength)
+ .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
+ .toString();
+
+ Tree content = root.getTree("/").addChild(testId);
+ content.addChild(generatedPath).setProperty("a", "foo");
+ root.commit();
+
+ assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+ }
+
+ @Test
+ public void defineIndexTwice() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("a").noAsync();
+ String testId = UUID.randomUUID().toString();
+ Tree index = setIndex(testId, builder);
+ root.commit();
+
+ assertTrue(exists(index));
+
+ builder = createIndex("a").noAsync();
+ index = setIndex(testId, builder);
+ root.commit();
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
------------------------------------------------------------------------------
svn:eol-style = native