You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/08/26 14:01:34 UTC
[lucene-solr] branch branch_8x updated: LUCENE-10060: Ensure
DrillSidewaysQuery instances never get cached (#2561)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 1ae34dd LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached (#2561)
1ae34dd is described below
commit 1ae34dd129d791f61d58202e2d993ab2055ad1d5
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Thu Aug 26 07:01:16 2021 -0700
LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached (#2561)
---
lucene/CHANGES.txt | 2 +
.../org/apache/lucene/facet/DrillSideways.java | 13 +--
.../apache/lucene/facet/DrillSidewaysQuery.java | 17 ++-
.../org/apache/lucene/facet/TestDrillSideways.java | 121 ++++++++++++++++++++-
4 files changed, 131 insertions(+), 22 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 24bda76..db8f924 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -113,6 +113,8 @@ Bug Fixes
* LUCENE-9963: FlattenGraphFilter is now more robust when handling
incoming holes in the input token graph (Geoff Lawson)
+* LUCENE-10060: Ensure DrillSidewaysQuery instances never get cached. (Greg Miller, Zachary Chen)
+
Other
---------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
index b208726..11920a97 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
@@ -35,14 +35,12 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
-import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
@@ -246,16 +244,7 @@ public class DrillSideways {
drillSidewaysFacetsCollectorManagers,
drillDownQueries,
scoreSubDocsAtOnce());
- if (hitCollector.scoreMode().needsScores() == false) {
- // this is a horrible hack in order to make sure IndexSearcher will not
- // attempt to cache the DrillSidewaysQuery
- hitCollector = new FilterCollector(hitCollector) {
- @Override
- public ScoreMode scoreMode() {
- return ScoreMode.COMPLETE;
- }
- };
- }
+
searcher.search(dsq, hitCollector);
FacetsCollector drillDownCollector;
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java
index 359cd81..814f7e3 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java
@@ -160,15 +160,14 @@ class DrillSidewaysQuery extends Query {
@Override
public boolean isCacheable(LeafReaderContext ctx) {
- if (baseWeight.isCacheable(ctx) == false) {
- return false;
- }
- for (Weight w : drillDowns) {
- if (w.isCacheable(ctx) == false) {
- return false;
- }
- }
- return true;
+ // We can never cache DSQ instances. It's critical that the BulkScorer produced by this
+ // Weight runs through the "normal" execution path so that it has access to an
+ // "acceptDocs" instance that accurately reflects deleted docs. During caching,
+ // "acceptDocs" is null so that caching over-matches (since the final BulkScorer would
+ // account for deleted docs). The problem is that this BulkScorer has a side-effect of
+ // populating the "sideways" FacetsCollectors, so it will use deleted docs in its
+ // sideways counting if caching kicks in. See LUCENE-10060:
+ return false;
}
@Override
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index 74e87fb..75b01a4 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -61,6 +61,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryCachingPolicy;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
@@ -132,6 +133,124 @@ public class TestDrillSideways extends FacetTestCase {
return newSearcher(reader, true, false, random().nextBoolean());
}
+ // See LUCENE-10060:
+ public void testNoCaching() throws Exception {
+ Directory dir = newDirectory();
+ Directory taxoDir = newDirectory();
+
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ DirectoryTaxonomyWriter taxoWriter =
+ new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+
+ FacetsConfig config = new FacetsConfig();
+
+ // Setup some basic test documents:
+ Document doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("1")));
+ doc.add(new FacetField("Color", "Red"));
+ doc.add(new FacetField("Size", "Small"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "2", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("2")));
+ doc.add(new FacetField("Color", "Green"));
+ doc.add(new FacetField("Size", "Small"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "3", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("3")));
+ doc.add(new FacetField("Color", "Blue"));
+ doc.add(new FacetField("Size", "Small"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "4", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("4")));
+ doc.add(new FacetField("Color", "Red"));
+ doc.add(new FacetField("Size", "Medium"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "5", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("5")));
+ doc.add(new FacetField("Color", "Blue"));
+ doc.add(new FacetField("Size", "Medium"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ doc = new Document();
+ doc.add(new StringField("id", "6", Field.Store.NO));
+ doc.add(new SortedDocValuesField("id", new BytesRef("6")));
+ doc.add(new FacetField("Color", "Blue"));
+ doc.add(new FacetField("Size", "Large"));
+ writer.addDocument(config.build(taxoWriter, doc));
+
+ // Delete a couple documents. We'll want to make sure they don't get counted in binning:
+ writer.deleteDocuments(new Term("id", "4"));
+ writer.deleteDocuments(new Term("id", "6"));
+
+ // Simple DDQ that just filters all results by Color == Blue:
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.add("Color", "Blue");
+
+ // Setup an IndexSearcher that will try to cache queries aggressively:
+ IndexSearcher searcher = getNewSearcher(writer.getReader());
+ searcher.setQueryCachingPolicy(
+ new QueryCachingPolicy() {
+ @Override
+ public void onUse(Query query) {}
+
+ @Override
+ public boolean shouldCache(Query query) {
+ return true;
+ }
+ });
+
+ // Setup a DS instance for searching:
+ TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+ DrillSideways ds = getNewDrillSideways(searcher, config, taxoReader);
+
+ // We'll use a CollectorManager to trigger the trickiest caching behavior:
+ SimpleCollectorManager collectorManager =
+ new SimpleCollectorManager(10, Comparator.comparing(cr -> cr.id));
+ // Make sure our CM produces Collectors that _do not_ need scores to ensure IndexSearcher tries
+ // to cache:
+ assertFalse(collectorManager.newCollector().scoreMode().needsScores());
+ // If we incorrectly cache here, the "sideways" FacetsCollectors will get populated with counts
+ // for the deleted
+ // docs. Make sure they don't:
+ DrillSideways.ConcurrentDrillSidewaysResult<List<DocAndScore>> concurrentResult =
+ ds.search(ddq, collectorManager);
+ assertEquals(2, concurrentResult.collectorResult.size());
+ assertEquals(
+ "dim=Color path=[] value=4 childCount=3\n Blue (2)\n Red (1)\n Green (1)\n",
+ concurrentResult.facets.getTopChildren(10, "Color").toString());
+ assertEquals(
+ "dim=Size path=[] value=2 childCount=2\n Small (1)\n Medium (1)\n",
+ concurrentResult.facets.getTopChildren(10, "Size").toString());
+
+ // Now do the same thing but use a Collector directly:
+ SimpleCollector collector = new SimpleCollector();
+ // Make sure our Collector _does not_ need scores to ensure IndexSearcher tries to cache:
+ assertFalse(collector.scoreMode().needsScores());
+ // If we incorrectly cache here, the "sideways" FacetsCollectors will get populated with counts
+ // for the deleted
+ // docs. Make sure they don't:
+ DrillSidewaysResult result = ds.search(ddq, collector);
+ assertEquals(2, collector.hits.size());
+ assertEquals(
+ "dim=Color path=[] value=4 childCount=3\n Blue (2)\n Red (1)\n Green (1)\n",
+ result.facets.getTopChildren(10, "Color").toString());
+ assertEquals(
+ "dim=Size path=[] value=2 childCount=2\n Small (1)\n Medium (1)\n",
+ result.facets.getTopChildren(10, "Size").toString());
+
+ writer.close();
+ IOUtils.close(searcher.getIndexReader(), taxoReader, taxoWriter, dir, taxoDir);
+ }
+
public void testBasic() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
@@ -1258,7 +1377,7 @@ public class TestDrillSideways extends FacetTestCase {
@Override
public ScoreMode scoreMode() {
- return ScoreMode.COMPLETE;
+ return ScoreMode.COMPLETE_NO_SCORES;
}
}