You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mk...@apache.org on 2020/05/12 05:40:54 UTC
svn commit: r1877620 [1/2] - in /jackrabbit/oak/trunk:
oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/
oak-search-elastic/
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/
oak-...
Author: mkataria
Date: Tue May 12 05:40:54 2020
New Revision: 1877620
URL: http://svn.apache.org/viewvc?rev=1877620&view=rev
Log:
OAK-9045: Add facet support for elastic search
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java (with props)
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFacetTest.java (with props)
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/util/TapeSampling.java (with props)
jackrabbit/oak/trunk/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/search/util/TapeSamplingTest.java (with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/TapeSampling.java
jackrabbit/oak/trunk/oak-search-elastic/pom.xml
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/TapeSampling.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/TapeSampling.java?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/TapeSampling.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/TapeSampling.java Tue May 12 05:40:54 2020
@@ -25,19 +25,9 @@ import java.util.Iterator;
import java.util.Random;
/**
- * Sampling algorithm that picks 'k' random samples from streaming input.
- * The algorithm would maintain 'k/N' probability to pick any of the item
- * where 'N' is the number of items seen currently.
- *
- * While the input could be streaming, the algorithm requires {@code N} to be known
- * before hand.
- *
- * The algorithm produces random saamples without replacement and hence has O(1) extra
- * memory complexity
- *
- * Implementation inspired from "JONES,T.G. A note on sampling a tape file"
- * (https://dl.acm.org/citation.cfm?id=368159)
+ * @deprecated Class moved to package org.apache.jackrabbit.oak.plugins.index.search.util in oak-search
*/
+@Deprecated
public class TapeSampling<T> {
private final Random rGen;
private final Iterator<T> input;
Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Tue May 12 05:40:54 2020
@@ -248,6 +248,12 @@
<version>1.12.5</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.jackrabbit</groupId>
+ <artifactId>oak-jcr</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</project>
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java Tue May 12 05:40:54 2020
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexNode;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
+import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.SearchHit;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+public class ElasticFacetHelper {
+
+ private ElasticFacetHelper() {
+ }
+
+ public static ElasticsearchFacets getAggregates(ElasticsearchSearcher searcher, QueryBuilder query,
+ ElasticsearchIndexNode indexNode, QueryIndex.IndexPlan plan,
+ ElasticsearchAggregationData elasticsearchAggregationData) {
+ ElasticsearchFacets elasticsearchFacets;
+ SecureFacetConfiguration secureFacetConfiguration = indexNode.getDefinition().getSecureFacetConfiguration();
+ switch (secureFacetConfiguration.getMode()) {
+ case INSECURE:
+ elasticsearchFacets = new InsecureElasticSearchFacets(searcher, query, plan, elasticsearchAggregationData);
+ break;
+ case STATISTICAL:
+ elasticsearchFacets = new StatisticalElasticSearchFacets(searcher, query, plan,
+ secureFacetConfiguration, elasticsearchAggregationData);
+ break;
+ case SECURE:
+ default:
+ elasticsearchFacets = new SecureElasticSearchFacets(searcher, query, plan);
+ break;
+ }
+ return elasticsearchFacets;
+ }
+
+ public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) throws UnsupportedEncodingException {
+ List<String> accessibleDocs = new LinkedList<>();
+ for (SearchHit searchHit : searchHits) {
+ String id = searchHit.getId();
+ String path = idToPath(id);
+ if (filter.isAccessible(path)) {
+ accessibleDocs.add(id);
+ }
+ }
+ return accessibleDocs;
+ }
+
+ public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) throws UnsupportedEncodingException {
+ int count = 0;
+ while (searchHitIterator.hasNext()) {
+ SearchHit searchHit = searchHitIterator.next();
+ String id = searchHit.getId();
+ String path = idToPath(id);
+ if (filter.isAccessible(path)) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ public static String idToPath(String id) throws UnsupportedEncodingException {
+ return URLDecoder.decode(id, "UTF-8");
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java Tue May 12 05:40:54 2020
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import org.elasticsearch.search.aggregations.Aggregations;
+
+/*
+ This class's object is used in facets to save unnecessary call to Elasticsearch
+ as this info is also retrieved when calling ES in rowIterator.
+ */
+public class ElasticsearchAggregationData {
+ private int numberOfFacets; // topFacet count from indexDefinition
+ private long totalDocuments; // total documents in query result.
+ private Aggregations aggregations; // Aggregated data for query from ES
+
+ public ElasticsearchAggregationData(int numberOfFacets, long totalDocuments, Aggregations aggregations) {
+ this.numberOfFacets = numberOfFacets;
+ this.totalDocuments = totalDocuments;
+ this.aggregations = aggregations;
+ }
+
+ public int getNumberOfFacets() {
+ return numberOfFacets;
+ }
+
+ public long getTotalDocuments() {
+ return totalDocuments;
+ }
+
+ public Aggregations getAggregations() {
+ return aggregations;
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchAggregationData.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java Tue May 12 05:40:54 2020
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+public interface ElasticsearchFacets {
+
+ /**
+ * @return ElasticsearchSearcher
+ */
+ ElasticsearchSearcher getSearcher();
+
+ /**
+ * @return QueryBuilder
+ */
+ QueryBuilder getQuery();
+
+ /**
+ * @return QueryIndex.IndexPlan
+ */
+ QueryIndex.IndexPlan getPlan();
+
+ /**
+ * @param numberOfFacets number of topFacets to be returned
+ * @return A map with facetName as key and List of facets in descending order of facetCount.
+ * @throws IOException
+ */
+ Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException;
+
+ /**
+ * We can retrieve Aggregation in a single call to elastic search while querying. Which can then be passed
+ * to ElasticSearchfacets instead of calling ES again to fetch same info. If ElasticsearchAggregationData is null
+ * then we get data by again querying ES
+ *
+ * @return ElasticsearchAggregationData
+ */
+ @Nullable
+ ElasticsearchAggregationData getElasticsearchAggregationData();
+
+ class ElasticSearchFacet {
+
+ private final String label;
+ private final Long count;
+
+ public ElasticSearchFacet(String label, Long count) {
+ this.label = label;
+ this.count = count;
+ }
+
+ @NotNull
+ public String getLabel() {
+ return label;
+ }
+
+ public Long getCount() {
+ return count;
+ }
+
+ public FulltextIndex.Facet convertToFacet() {
+ return new FulltextIndex.Facet(this.getLabel(), Math.toIntExact(this.getCount()));
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java Tue May 12 05:40:54 2020
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class InsecureElasticSearchFacets implements ElasticsearchFacets {
+ private static final Logger LOG = LoggerFactory.getLogger(InsecureElasticSearchFacets.class);
+
+ private ElasticsearchSearcher searcher;
+ private QueryBuilder query;
+ private QueryIndex.IndexPlan plan;
+ private ElasticsearchAggregationData elasticsearchAggregationData;
+
+ public InsecureElasticSearchFacets(ElasticsearchSearcher searcher, QueryBuilder query,
+ QueryIndex.IndexPlan plan, ElasticsearchAggregationData elasticsearchAggregationData) {
+ this.searcher = searcher;
+ this.query = query;
+ this.plan = plan;
+ this.elasticsearchAggregationData = elasticsearchAggregationData;
+ }
+
+ @Override
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ if (elasticsearchAggregationData != null && numberOfFacets <= elasticsearchAggregationData.getNumberOfFacets()) {
+ return changeToFacetList(elasticsearchAggregationData.getAggregations().getAsMap(), numberOfFacets);
+ }
+ LOG.warn("Facet data is being retrieved by again calling Elasticsearch");
+ List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, numberOfFacets);
+ ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(query)
+ .withAggregation(aggregationBuilders)
+ .build();
+ Map<String, Aggregation> facetResult = searcher.search(elasticsearchSearcherModel).getAggregations().getAsMap();
+ return changeToFacetList(facetResult, numberOfFacets);
+ }
+
+ Map<String, List<FulltextIndex.Facet>> changeToFacetList(Map<String, Aggregation> docs, int topFacetCount) {
+ Map<String, List<FulltextIndex.Facet>> facetMap = new HashMap<>();
+ for (String facet : docs.keySet()) {
+ Terms terms = (Terms) docs.get(facet);
+ List<? extends Terms.Bucket> buckets = terms.getBuckets();
+ final List<FulltextIndex.Facet> facetList = new ArrayList<>();
+ for (Terms.Bucket bucket : buckets) {
+ String facetKey = bucket.getKeyAsString();
+ long facetCount = bucket.getDocCount();
+ facetList.add(new FulltextIndex.Facet(facetKey, (int) facetCount));
+ }
+
+ if ((facetList.size() > topFacetCount)) {
+ facetMap.put(facet, facetList.subList(0, topFacetCount));
+ } else {
+ facetMap.put(facet, facetList);
+ }
+ }
+ return facetMap;
+ }
+
+ @Override
+ public ElasticsearchSearcher getSearcher() {
+ return searcher;
+ }
+
+ @Override
+ public QueryBuilder getQuery() {
+ return query;
+ }
+
+ @Override
+ public QueryIndex.IndexPlan getPlan() {
+ return plan;
+ }
+
+ @Override
+ public ElasticsearchAggregationData getElasticsearchAggregationData() {
+ return elasticsearchAggregationData;
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java Tue May 12 05:40:54 2020
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+
+public class SecureElasticSearchFacets extends InsecureElasticSearchFacets {
+
+ public SecureElasticSearchFacets(ElasticsearchSearcher searcher, QueryBuilder query,
+ QueryIndex.IndexPlan plan) {
+ super(searcher, query, plan, null);
+ }
+
+ /*
+ We are not using elasticSearch's aggregation as we have to fetch each document to validate access rights
+ for docs.
+ */
+ @Override
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ Map<String, Map<String, Long>> secureFacetCount = new HashMap<>();
+ Filter filter = getPlan().getFilter();
+ boolean doFetch = true;
+ for (int from = 0; doFetch; from += ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE) {
+ ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(getQuery())
+ .withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
+ .withFrom(from)
+ .build();
+ SearchResponse docs = getSearcher().search(elasticsearchSearcherModel);
+ SearchHit[] searchHits = docs.getHits().getHits();
+ long totalResults = docs.getHits().getTotalHits().value;
+ if (totalResults <= from + ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE || searchHits.length == 0) {
+ doFetch = false;
+ }
+
+ List<String> accessibleDocs = ElasticFacetHelper.getAccessibleDocIds(searchHits, filter);
+ if (accessibleDocs.isEmpty()) continue;
+ QueryBuilder queryWithAccessibleDocIds = QueryBuilders.termsQuery("_id", accessibleDocs);
+ Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds, accessibleDocs.size());
+ collateAggregations(secureFacetCount, accessibleDocsAggregation);
+ }
+
+ Map<String, List<FulltextIndex.Facet>> facetResult = new HashMap<>();
+ for (String facet : secureFacetCount.keySet()) {
+ PriorityQueue<ElasticSearchFacet> pq = new PriorityQueue<>(numberOfFacets, (o1, o2) -> o2.getCount().compareTo(o1.getCount()));
+ Map<String, Long> facetLabelMap = secureFacetCount.get(facet);
+ for (String label : facetLabelMap.keySet()) {
+ pq.add(new ElasticSearchFacet(label, facetLabelMap.get(label)));
+ }
+ List<FulltextIndex.Facet> fc = new LinkedList<>();
+ pq.forEach(elasticSearchFacet -> fc.add(elasticSearchFacet.convertToFacet()));
+ facetResult.put(facet, fc);
+ }
+ return facetResult;
+ }
+
+ private void collateAggregations(Map<String, Map<String, Long>> secureFacetCount, Map<String, Aggregation> docs) {
+ for (String facet : docs.keySet()) {
+ Terms terms = (Terms) docs.get(facet);
+ List<? extends Terms.Bucket> buckets = terms.getBuckets();
+ for (Terms.Bucket bucket : buckets) {
+ String label = bucket.getKeyAsString();
+ Long count = bucket.getDocCount();
+ collateFacetData(secureFacetCount, facet, label, count);
+ }
+ }
+ }
+
+ private void collateFacetData(Map<String, Map<String, Long>> globalData, String facet, String label, Long count) {
+ if (globalData.get(facet) == null) {
+ Map<String, Long> labelValueMap = new HashMap<>();
+ labelValueMap.put(label, count);
+ globalData.put(facet, labelValueMap);
+ } else {
+ if (globalData.get(facet).get(label) == null) {
+ globalData.get(facet).put(label, count);
+ } else {
+ Long existingCount = globalData.get(facet).get(label);
+ globalData.get(facet).put(label, existingCount + count);
+ }
+ }
+ }
+
+ private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount) throws IOException {
+ List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), facetCount);
+ ElasticsearchSearcherModel idBasedelasticsearchSearcherModelWithAggregation = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(queryWithAccessibleDocIds)
+ .withAggregation(aggregationBuilders)
+ .build();
+
+ SearchResponse facetDocs = getSearcher().search(idBasedelasticsearchSearcherModelWithAggregation);
+ Map<String, Aggregation> aggregationMap = facetDocs.getAggregations().asMap();
+ return aggregationMap;
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java Tue May 12 05:40:54 2020
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
+
+import com.google.common.collect.AbstractIterator;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
+import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.apache.jackrabbit.oak.plugins.index.search.util.TapeSampling;
+import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.SearchHit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+public class StatisticalElasticSearchFacets extends InsecureElasticSearchFacets {
+ private static final Logger LOG = LoggerFactory.getLogger(StatisticalElasticSearchFacets.class);
+
+ private final IndexDefinition.SecureFacetConfiguration secureFacetConfiguration;
+
+ public StatisticalElasticSearchFacets(ElasticsearchSearcher searcher, QueryBuilder query,
+ QueryIndex.IndexPlan plan, IndexDefinition.SecureFacetConfiguration secureFacetConfiguration,
+ ElasticsearchAggregationData elasticsearchAggregationData) {
+ super(searcher, query, plan, elasticsearchAggregationData);
+ this.secureFacetConfiguration = secureFacetConfiguration;
+ }
+
+ public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException {
+ Map<String, List<FulltextIndex.Facet>> result = new HashMap<>();
+ Map<String, List<FulltextIndex.Facet>> topChildren;
+ Filter filter = getPlan().getFilter();
+ int hitCount;
+ int sampleSize = secureFacetConfiguration.getStatisticalFacetSampleSize();
+ ElasticsearchAggregationData aggregationData = getElasticsearchAggregationData();
+ if (aggregationData == null || aggregationData.getNumberOfFacets() < numberOfFacets) {
+ LOG.warn("Facets and Totalhit count are being retrieved by calling Elasticsearch");
+ topChildren = super.getElasticSearchFacets(numberOfFacets);
+ ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(getQuery())
+ .withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
+ .build();
+ SearchResponse docs = getSearcher().search(elasticsearchSearcherModel);
+ long totalResults = docs.getHits().getTotalHits().value;
+ hitCount = Math.toIntExact(totalResults);
+ } else {
+ topChildren = changeToFacetList(getElasticsearchAggregationData().getAggregations().getAsMap(), numberOfFacets);
+ hitCount = Math.toIntExact(getElasticsearchAggregationData().getTotalDocuments());
+ }
+
+ // In case the hit count is less than sample size(A very small reposiotry perhaps)
+ // Delegate getting FacetResults to SecureSortedSetDocValuesFacetCounts to get the exact count
+ // instead of statistical count. <OAK-8138>
+ if (hitCount < sampleSize) {
+ LOG.debug("SampleSize: {} is greater than hitcount: {}, Getting secure facet count", sampleSize, hitCount);
+ return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(numberOfFacets);
+ }
+ long randomSeed = secureFacetConfiguration.getRandomSeed();
+ Iterator<SearchHit> docIterator = getMatchingDocIterator(getSearcher(), getQuery());
+ Iterator<SearchHit> sampleIterator = docIterator;
+ if (sampleSize < hitCount) {
+ LOG.debug("SampleSize: {} is less than hitcount: {}, sampling data", sampleSize, hitCount);
+ sampleIterator = getSampledMatchingDocIterator(docIterator, randomSeed, hitCount, sampleSize);
+ }
+ int accessibleSampleCount = ElasticFacetHelper.getAccessibleDocCount(sampleIterator, filter);
+ for (String facet : topChildren.keySet()) {
+ List<FulltextIndex.Facet> labelAndValues = topChildren.get(facet);
+ labelAndValues = updateLabelAndValueIfRequired(labelAndValues, sampleSize, accessibleSampleCount);
+ result.put(facet, labelAndValues);
+ }
+ return result;
+ }
+
+ private Iterator<SearchHit> getMatchingDocIterator(ElasticsearchSearcher searcher, QueryBuilder query) {
+ return new AbstractIterator<SearchHit>() {
+ List<SearchHit> matchingDocuments = new LinkedList<>();
+ Iterator<SearchHit> matchingDocsListIterator = matchingDocuments.iterator();
+ int from;
+
+ @Override
+ protected SearchHit computeNext() {
+ try {
+ if (matchingDocsListIterator.hasNext()) {
+ return matchingDocsListIterator.next();
+ } else {
+ ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(query)
+ .withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
+ .withFrom(from)
+ .build();
+ SearchResponse searchResponse = searcher.search(elasticsearchSearcherModel);
+ SearchHit[] searchHits = searchResponse.getHits().getHits();
+ if (searchHits.length == 0 || searchHits.length < ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE) {
+ return endOfData();
+ } else {
+ matchingDocuments = Arrays.asList(searchHits);
+ matchingDocsListIterator = matchingDocuments.iterator();
+ from += ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE;
+ return matchingDocsListIterator.next();
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ private Iterator<SearchHit> getSampledMatchingDocIterator(Iterator<SearchHit> matchingDocs,
+ long randomdSeed, int hitCount, int sampleSize) {
+ TapeSampling<SearchHit> tapeSampling = new TapeSampling<>(new Random(randomdSeed), matchingDocs, hitCount, sampleSize);
+
+ return tapeSampling.getSamples();
+ }
+
+ private List<FulltextIndex.Facet> updateLabelAndValueIfRequired(List<FulltextIndex.Facet> labelAndValues,
+ int sampleSize, int accessibleCount) {
+ if (accessibleCount < sampleSize) {
+ int numZeros = 0;
+ List<FulltextIndex.Facet> newValues;
+ {
+ List<FulltextIndex.Facet> proportionedLVs = new LinkedList<>();
+ for (FulltextIndex.Facet labelAndValue : labelAndValues) {
+ long count = labelAndValue.getCount() * accessibleCount / sampleSize;
+ if (count == 0) {
+ numZeros++;
+ }
+ proportionedLVs.add(new FulltextIndex.Facet(labelAndValue.getLabel(), Math.toIntExact(count)));
+ }
+ labelAndValues = proportionedLVs;
+ }
+ if (numZeros > 0) {
+ newValues = new LinkedList<>();
+ for (FulltextIndex.Facet lv : labelAndValues) {
+ if (lv.getCount() > 0) {
+ newValues.add(lv);
+ }
+ }
+ } else {
+ newValues = labelAndValues;
+ }
+ return newValues;
+ } else {
+ return labelAndValues;
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java Tue May 12 05:40:54 2020
@@ -90,7 +90,7 @@ class ElasticsearchIndex extends Fulltex
@Override
protected String getFulltextRequestString(IndexPlan plan, IndexNode indexNode) {
- return Strings.toString(ElasticsearchResultRowIterator.getESRequest(plan, getPlanResult(plan)));
+ return Strings.toString(ElasticsearchResultRowIterator.getESQuery(plan, getPlanResult(plan)));
}
@Override
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java Tue May 12 05:40:54 2020
@@ -25,7 +25,7 @@ import org.apache.jackrabbit.oak.spi.sta
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
-class ElasticsearchIndexNode implements IndexNode {
+public class ElasticsearchIndexNode implements IndexNode {
private final ElasticsearchConnection elasticsearchConnection;
private final ElasticsearchIndexDefinition indexDefinition;
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java Tue May 12 05:40:54 2020
@@ -20,6 +20,11 @@ import org.apache.jackrabbit.oak.api.Typ
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.PerfLogger;
import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets.ElasticFacetHelper;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets.ElasticsearchAggregationData;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets.ElasticsearchFacets;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
@@ -40,6 +45,7 @@ import org.elasticsearch.index.query.Boo
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
@@ -52,8 +58,10 @@ import java.net.URLDecoder;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiPredicate;
import java.util.stream.StreamSupport;
@@ -93,35 +101,28 @@ class ElasticsearchResultRowIterator imp
private static final char WILDCARD_STRING = '*';
private static final char WILDCARD_CHAR = '?';
- /**
- * Batch size for fetching results from queries.
- */
- private static final int ELASTICSEARCH_QUERY_BATCH_SIZE = 1000;
-
- private static final int ELASTICSEARCH_QUERY_MAX_BATCH_SIZE = 10000;
-
private final Deque<FulltextIndex.FulltextResultRow> queue = new ArrayDeque<>();
// TODO : find if ES can return dup docs - if so how to avoid
// private final Set<String> seenPaths = Sets.newHashSet();
private SearchHit lastDoc;
- private int nextBatchSize = ELASTICSEARCH_QUERY_BATCH_SIZE;
+ private int nextBatchSize = ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE;
private boolean noDocs = false;
private final Filter filter;
- private final PlanResult pr;
+ private final PlanResult planResult;
private final IndexPlan plan;
private final ElasticsearchIndexNode indexNode;
private final RowInclusionPredicate rowInclusionPredicate;
private final LMSEstimator estimator;
ElasticsearchResultRowIterator(@NotNull Filter filter,
- @NotNull PlanResult pr,
+ @NotNull PlanResult planResult,
@NotNull IndexPlan plan,
ElasticsearchIndexNode indexNode,
RowInclusionPredicate rowInclusionPredicate,
LMSEstimator estimator) {
this.filter = filter;
- this.pr = pr;
+ this.planResult = planResult;
this.plan = plan;
this.indexNode = indexNode;
this.rowInclusionPredicate = rowInclusionPredicate != null ? rowInclusionPredicate : RowInclusionPredicate.NOOP;
@@ -156,14 +157,27 @@ class ElasticsearchResultRowIterator imp
SearchHit lastDocToRecord = null;
try {
ElasticsearchSearcher searcher = getCurrentSearcher(indexNode);
- QueryBuilder query = getESRequest(plan, pr);
+ QueryBuilder query = getESQuery(plan, planResult);
+ int numberOfFacets = indexNode.getDefinition().getNumberOfTopFacets();
+ List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil
+ .getAggregators(plan, numberOfFacets);
+
+ ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
+ .withQuery(query)
+ .withBatchSize(nextBatchSize)
+ .withAggregation(aggregationBuilders)
+ .build();
+
// TODO: custom scoring
SearchResponse docs;
long start = PERF_LOGGER.start();
while (true) {
LOG.debug("loading {} entries for query {}", nextBatchSize, query);
- docs = searcher.search(query, nextBatchSize);
+ docs = searcher.search(elasticsearchSearcherModel);
+ long totalHits = docs.getHits().getTotalHits().value;
+ ElasticsearchAggregationData elasticsearchAggregationData =
+ new ElasticsearchAggregationData(numberOfFacets, totalHits, docs.getAggregations());
SearchHit[] searchHits = docs.getHits().getHits();
PERF_LOGGER.end(start, -1, "{} ...", searchHits.length);
@@ -174,9 +188,9 @@ class ElasticsearchResultRowIterator imp
noDocs = true;
}
- nextBatchSize = (int) Math.min(nextBatchSize * 2L, ELASTICSEARCH_QUERY_MAX_BATCH_SIZE);
+ nextBatchSize = (int) Math.min(nextBatchSize * 2L, ElasticsearchConstants.ELASTICSEARCH_QUERY_MAX_BATCH_SIZE);
- // TODO: faceting
+ ElasticsearchFacetProvider elasticsearchFacetProvider = new ElasticsearchFacetProvider(ElasticFacetHelper.getAggregates(searcher, query, indexNode, plan, elasticsearchAggregationData));
// TODO: excerpt
@@ -187,7 +201,7 @@ class ElasticsearchResultRowIterator imp
for (SearchHit doc : searchHits) {
// TODO : excerpts
- FulltextIndex.FulltextResultRow row = convertToRow(doc);
+ FulltextIndex.FulltextResultRow row = convertToRow(doc, elasticsearchFacetProvider);
if (row != null) {
queue.add(row);
}
@@ -222,16 +236,17 @@ class ElasticsearchResultRowIterator imp
return new ElasticsearchSearcher(indexNode);
}
- private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit) throws IOException {
+ private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
+ ElasticsearchFacetProvider elasticsearchFacetProvider) throws IOException {
String id = hit.getId();
String path = idToPath(id);
if (path != null) {
if ("".equals(path)) {
path = "/";
}
- if (pr.isPathTransformed()) {
+ if (planResult.isPathTransformed()) {
String originalPath = path;
- path = pr.transformPath(path);
+ path = planResult.transformPath(path);
if (path == null) {
LOG.trace("Ignoring path {} : Transformation returned null", originalPath);
@@ -239,10 +254,10 @@ class ElasticsearchResultRowIterator imp
}
}
- boolean shouldIncludeForHierarchy = rowInclusionPredicate.shouldInclude(path, plan);
+ boolean shouldIncludeForHierarchy = rowInclusionPredicate.shouldInclude(path, this.plan);
LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path, shouldIncludeForHierarchy);
return shouldIncludeForHierarchy ? new FulltextIndex.FulltextResultRow(path, hit.getScore(), null,
- null, null)
+ elasticsearchFacetProvider, null)
: null;
}
return null;
@@ -261,7 +276,7 @@ class ElasticsearchResultRowIterator imp
* @param planResult
* @return the Lucene query
*/
- static QueryBuilder getESRequest(IndexPlan plan, PlanResult planResult) {
+ static QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
List<QueryBuilder> qs = new ArrayList<>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
@@ -722,4 +737,22 @@ class ElasticsearchResultRowIterator imp
private static String idToPath(String id) throws UnsupportedEncodingException {
return URLDecoder.decode(id, "UTF-8");
}
+
+ class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider {
+ private ElasticsearchFacets elasticsearchFacets;
+ private Map<String, List<FulltextIndex.Facet>> cachedResults = new HashMap<>();
+
+ ElasticsearchFacetProvider(ElasticsearchFacets elasticsearchFacets) {
+ this.elasticsearchFacets = elasticsearchFacets;
+ }
+
+ @Override
+ public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName) throws IOException {
+ String facetProp = FulltextIndex.parseFacetField(columnName);
+ if (cachedResults.get(facetProp) == null) {
+ cachedResults = elasticsearchFacets.getElasticSearchFacets(numberOfFacets);
+ }
+ return cachedResults.get(facetProp);
+ }
+ }
}
Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java?rev=1877620&r1=1877619&r2=1877620&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java Tue May 12 05:40:54 2020
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.elasticsearch.query;
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.SearchSourceBuilderUtil;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
@@ -26,13 +27,23 @@ import org.jetbrains.annotations.NotNull
import java.io.IOException;
-class ElasticsearchSearcher {
+public class ElasticsearchSearcher {
private final ElasticsearchIndexNode indexNode;
ElasticsearchSearcher(@NotNull ElasticsearchIndexNode indexNode) {
this.indexNode = indexNode;
}
+ public SearchResponse search(ElasticsearchSearcherModel elasticsearchSearcherModel) throws IOException {
+ SearchSourceBuilder searchSourceBuilder = SearchSourceBuilderUtil.createSearchSourceBuilder(elasticsearchSearcherModel);
+
+ SearchRequest request = new SearchRequest(indexNode.getDefinition().getRemoteIndexName())
+ .source(searchSourceBuilder);
+
+ return indexNode.getConnection().getClient().search(request, RequestOptions.DEFAULT);
+ }
+
+ @Deprecated
public SearchResponse search(QueryBuilder query, int batchSize) throws IOException {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(query)
@@ -45,4 +56,8 @@ class ElasticsearchSearcher {
return indexNode.getConnection().getClient().search(request, RequestOptions.DEFAULT);
}
+
+ public ElasticsearchSearcher getElasticsearchSearcher(){
+ return this;
+ }
}
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java Tue May 12 05:40:54 2020
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.query;
+
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+
+import java.util.LinkedList;
+import java.util.List;
+
+public class ElasticsearchSearcherModel {
+
+ private QueryBuilder queryBuilder;
+ private List<AggregationBuilder> aggregationBuilders = new LinkedList<>();
+ private int batchSize;
+ private boolean fetchSource;
+ private String storedField = FieldNames.PATH;
+ private int from;
+
+ private ElasticsearchSearcherModel(QueryBuilder queryBuilder, List<AggregationBuilder> aggregationBuilders,
+ int batchSize, boolean fetchSource, String storedField, int from) {
+ this.queryBuilder = queryBuilder;
+ this.aggregationBuilders = aggregationBuilders;
+ this.batchSize = batchSize;
+ this.fetchSource = fetchSource;
+ this.storedField = storedField;
+ this.from = from;
+ }
+
+ public int getBatchSize() {
+ return batchSize;
+ }
+
+ public int getFrom() {
+ return from;
+ }
+
+ public QueryBuilder getQueryBuilder() {
+ return queryBuilder;
+ }
+
+ public List<AggregationBuilder> getAggregationBuilders() {
+ return aggregationBuilders;
+ }
+
+ public boolean fetchSource() {
+ return fetchSource;
+ }
+
+ public String getStoredField() {
+ return storedField;
+ }
+
+ public static class ElasticsearchSearcherModelBuilder {
+ private QueryBuilder queryBuilder;
+ private List<AggregationBuilder> aggregationBuilders = new LinkedList<>();
+ private int batchSize;
+ private boolean fetchSource = false;
+ private String storedField = FieldNames.PATH;
+ private int from;
+
+ public ElasticsearchSearcherModelBuilder withQuery(QueryBuilder query) {
+ this.queryBuilder = query;
+ return this;
+ }
+
+ public ElasticsearchSearcherModelBuilder withAggregation(List<TermsAggregationBuilder> aggregationBuilders) {
+ this.aggregationBuilders.addAll(aggregationBuilders);
+ return this;
+ }
+
+ public ElasticsearchSearcherModelBuilder withBatchSize(int batchSize) {
+ this.batchSize = batchSize;
+ return this;
+ }
+
+ public ElasticsearchSearcherModelBuilder withFrom(int from) {
+ this.from = from;
+ return this;
+ }
+
+ public ElasticsearchSearcherModel build() {
+ return new ElasticsearchSearcherModel(queryBuilder, aggregationBuilders, batchSize, fetchSource, storedField, from);
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcherModel.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java Tue May 12 05:40:54 2020
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
+
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.QueryConstants;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.search.aggregations.AggregationBuilders;
+import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+public final class ElasticsearchAggregationBuilderUtil {
+
+ private ElasticsearchAggregationBuilderUtil() {
+ }
+
+ public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, int numberOfFacets) {
+ List<TermsAggregationBuilder> termsAggregationBuilders = new LinkedList<>();
+ Collection<Filter.PropertyRestriction> propertyRestrictions = plan.getFilter().getPropertyRestrictions();
+ for (Filter.PropertyRestriction propertyRestriction : propertyRestrictions) {
+ String name = propertyRestriction.propertyName;
+ if (QueryConstants.REP_FACET.equals(name)) {
+ String value = propertyRestriction.first.getValue(Type.STRING);
+ String facetProp = FulltextIndex.parseFacetField(value);
+ termsAggregationBuilders.add(AggregationBuilders.terms(facetProp).field(keywordFieldName(facetProp)).size(numberOfFacets));
+ }
+ }
+ return termsAggregationBuilders;
+ }
+
+ private static String keywordFieldName(String propName) {
+ return propName + "." + "keyword";
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java Tue May 12 05:40:54 2020
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
+
+public final class ElasticsearchConstants {
+ /**
+ * Batch size for fetching results from queries.
+ */
+ public static final int ELASTICSEARCH_QUERY_BATCH_SIZE = 1000;
+ public static final int ELASTICSEARCH_QUERY_MAX_BATCH_SIZE = 10000;
+
+ private ElasticsearchConstants() {
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchConstants.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java?rev=1877620&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java (added)
+++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java Tue May 12 05:40:54 2020
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
+
+import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
+
+public class SearchSourceBuilderUtil {
+
+ public static SearchSourceBuilder createSearchSourceBuilder(ElasticsearchSearcherModel elasticsearchSearcherModel) {
+ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
+ .query(elasticsearchSearcherModel.getQueryBuilder())
+ .fetchSource(elasticsearchSearcherModel.fetchSource())
+ .storedField(elasticsearchSearcherModel.getStoredField())
+ .size(elasticsearchSearcherModel.getBatchSize())
+ .from(elasticsearchSearcherModel.getFrom());
+
+ for (AggregationBuilder aggregationBuilder : elasticsearchSearcherModel.getAggregationBuilders()) {
+ searchSourceBuilder.aggregation(aggregationBuilder);
+ }
+ return searchSourceBuilder;
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
------------------------------------------------------------------------------
svn:eol-style = native