You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2021/05/19 22:58:59 UTC
[GitHub] [lucene] jtibshirani commented on a change in pull request #144: LUCENE-9965: Add tooling to introspect query execution time

jtibshirani commented on a change in pull request #144:
URL: https://github.com/apache/lucene/pull/144#discussion_r635628013



##########
File path: lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/profile/TestProfileQuery.java
##########
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.sandbox.queries.profile;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.LRUQueryCache;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RandomApproximationQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.hamcrest.MatcherAssert;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestProfileQuery extends LuceneTestCase {

Review comment:
       Small comment, `TestProfileIndexSearcher` could be a clearer name.

##########
File path: lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/profile/TestProfileQuery.java
##########
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.sandbox.queries.profile;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.LRUQueryCache;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.RandomApproximationQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.hamcrest.MatcherAssert;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestProfileQuery extends LuceneTestCase {
+
+  private static Directory dir;
+  private static IndexReader reader;
+  private static ProfileIndexSearcher searcher;
+
+  @BeforeClass
+  public static void setup() throws IOException {
+    dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    final int numDocs = TestUtil.nextInt(random(), 1, 20);
+    for (int i = 0; i < numDocs; ++i) {
+      final int numHoles = random().nextInt(5);
+      for (int j = 0; j < numHoles; ++j) {
+        w.addDocument(new Document());
+      }
+      Document doc = new Document();
+      doc.add(new StringField("foo", "bar", Store.NO));
+      w.addDocument(doc);
+    }
+    reader = w.getReader();
+    w.close();
+    searcher = new ProfileIndexSearcher(reader);
+  }
+
+  @After
+  public void checkNoCache() {
+    LRUQueryCache cache = (LRUQueryCache) searcher.getQueryCache();
+    MatcherAssert.assertThat(cache.getHitCount(), equalTo(0L));
+    MatcherAssert.assertThat(cache.getCacheCount(), equalTo(0L));
+    MatcherAssert.assertThat(cache.getTotalCount(), equalTo(cache.getMissCount()));
+    MatcherAssert.assertThat(cache.getCacheSize(), equalTo(0L));
+  }
+
+  @AfterClass
+  public static void cleanup() throws IOException {
+    IOUtils.close(reader, dir);
+    dir = null;
+    reader = null;
+    searcher = null;
+  }
+
+  public void testBasic() throws IOException {
+    QueryProfiler profiler = new QueryProfiler();
+    searcher.setProfiler(profiler);
+    Query query = new TermQuery(new Term("foo", "bar"));
+    searcher.search(query, 1);
+    List<ProfileResult> results = profiler.getTree();
+    assertEquals(1, results.size());
+    Map<String, Long> breakdown = results.get(0).getTimeBreakdown();
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.NEXT_DOC.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.ADVANCE.toString()), equalTo(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.SCORE.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.MATCH.toString()), equalTo(0L));
+
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.NEXT_DOC.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.ADVANCE.toString() + "_count"), equalTo(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.SCORE.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.MATCH.toString() + "_count"), equalTo(0L));
+
+    long rewriteTime = profiler.getRewriteTime();
+    MatcherAssert.assertThat(rewriteTime, greaterThan(0L));
+  }
+
+  public void testNoScoring() throws IOException {
+    QueryProfiler profiler = new QueryProfiler();
+    searcher.setProfiler(profiler);
+    Query query = new TermQuery(new Term("foo", "bar"));
+    searcher.search(query, 1, Sort.INDEXORDER); // scores are not needed
+    List<ProfileResult> results = profiler.getTree();
+    assertEquals(1, results.size());
+    Map<String, Long> breakdown = results.get(0).getTimeBreakdown();
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.NEXT_DOC.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.ADVANCE.toString()), equalTo(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.SCORE.toString()), equalTo(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.MATCH.toString()), equalTo(0L));
+
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.NEXT_DOC.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.ADVANCE.toString() + "_count"), equalTo(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.SCORE.toString() + "_count"), equalTo(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.MATCH.toString() + "_count"), equalTo(0L));
+
+    long rewriteTime = profiler.getRewriteTime();
+    MatcherAssert.assertThat(rewriteTime, greaterThan(0L));
+  }
+
+  public void testUseIndexStats() throws IOException {
+    QueryProfiler profiler = new QueryProfiler();
+    searcher.setProfiler(profiler);
+    Query query = new TermQuery(new Term("foo", "bar"));
+    searcher.count(query); // will use index stats
+    List<ProfileResult> results = profiler.getTree();
+    assertEquals(0, results.size());
+
+    long rewriteTime = profiler.getRewriteTime();
+    MatcherAssert.assertThat(rewriteTime, greaterThan(0L));
+  }
+
+  public void testApproximations() throws IOException {
+    QueryProfiler profiler = new QueryProfiler();
+    searcher.setProfiler(profiler);
+    Query query = new RandomApproximationQuery(new TermQuery(new Term("foo", "bar")), random());
+    searcher.count(query);
+    List<ProfileResult> results = profiler.getTree();
+    assertEquals(1, results.size());
+    Map<String, Long> breakdown = results.get(0).getTimeBreakdown();
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.NEXT_DOC.toString()), greaterThan(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.ADVANCE.toString()), equalTo(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.SCORE.toString()), equalTo(0L));
+    MatcherAssert.assertThat(breakdown.get(QueryTimingType.MATCH.toString()), greaterThan(0L));
+
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.CREATE_WEIGHT.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.BUILD_SCORER.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.NEXT_DOC.toString() + "_count"), greaterThan(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.ADVANCE.toString() + "_count"), equalTo(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.SCORE.toString() + "_count"), equalTo(0L));
+    MatcherAssert.assertThat(
+        breakdown.get(QueryTimingType.MATCH.toString() + "_count"), greaterThan(0L));
+
+    long rewriteTime = profiler.getRewriteTime();
+    MatcherAssert.assertThat(rewriteTime, greaterThan(0L));
+  }
+
+  public void testCollector() throws IOException {

Review comment:
       It'd be nice to have one test showing how the collector would actually be used in a search, through something like `IndexSearcher#search(Query query, Collector results)`.

##########
File path: lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/profile/ProfileIndexSearcher.java
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.sandbox.queries.profile;
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Weight;
+
+/**
+ * A simple extension of {@link IndexSearcher} to add a {@link QueryProfiler} that can be set to
+ * test query timings.
+ */
+public class ProfileIndexSearcher extends IndexSearcher {
+
+  private QueryProfiler profiler;
+
+  public ProfileIndexSearcher(IndexReader reader) {
+    super(reader);
+  }
+
+  public void setProfiler(QueryProfiler profiler) {

Review comment:
       Maybe we could add `QueryProfiler` as a constructor parameter and ensure it's always non-null. That'd simplify this class a bit, and make it clear that it's always meant to be used for profiling.
   
   We could even take this further and remove the `QueryProfiler` class, folding its logic into `ProfileIndexSearcher`. It doesn't seem like a helpful abstraction on its own?

##########
File path: lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/profile/AbstractInternalProfileTree.java
##########
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.sandbox.queries.profile;
+
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.List;
+
+/**
+ * This class tracks a Query tree for profiling. This class can be extended to allow different
+ * element types for timing with the tree.
+ */
+public abstract class AbstractInternalProfileTree<PB extends AbstractProfileBreakdown<?>, E> {

Review comment:
       +1 to simplifying the class hierarchy as much as possible. That may mean Elasticsearch can't reuse these classes instead of the original ones, but that seems okay! We could also remove the name `Internal` from these classes to simplify (so we just have `QueryProfileTree` and `ProfileCollector`).




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org