You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/12/15 21:35:18 UTC

[03/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7576: detect when special case automaton is passed to Terms.intersect

LUCENE-7576: detect when special case automaton is passed to Terms.intersect


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fcccd317
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fcccd317
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fcccd317

Branch: refs/heads/jira/solr-8593
Commit: fcccd317ddb44a742a0b3265fcf32923649f38cd
Parents: c61268f
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 15:26:04 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 15:26:04 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                                |  4 ++++
 .../lucene/codecs/blocktree/FieldReader.java      |  3 +++
 .../src/java/org/apache/lucene/index/Terms.java   |  8 ++++++--
 .../org/apache/lucene/index/TestTermsEnum.java    | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e62a99d..4afc507 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,10 @@ Bug Fixes
 * LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
   component when preserveOriginal was set to true. (Adrien Grand)
 
+* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
+  the incoming automaton is a special case and throw a clearer
+  exception than NullPointerException (Tom Mortimer via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 7f13a32..4ee3826 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
     //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
     // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
     // can we optimize knowing that...?
+    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+    }
     return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
   }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/index/Terms.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java
index dd48ce9..7197e25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@@ -49,8 +49,12 @@ public abstract class Terms {
    *  provided <code>startTerm</code> must be accepted by
    *  the automaton.
    *
-   * <p><b>NOTE</b>: the returned TermsEnum cannot
-   * seek</p>.
+   *  <p>This is an expert low-level API and will only work
+   *  for {@code NORMAL} compiled automata.  To handle any
+   *  compiled automata you should instead use
+   *  {@link CompiledAutomaton#getTermsEnum} instead.
+   *
+   *  <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
    *
    *  <p><b>NOTE</b>: the terms dictionary is free to
    *  return arbitrary terms as long as the resulted visited

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index 3f15381..a388d42 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -998,4 +998,22 @@ public class TestTermsEnum extends LuceneTestCase {
     }
     dir.close();
   }
+
+  // LUCENE-7576
+  public void testIntersectRegexp() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    doc.add(newStringField("field", "foobar", Field.Store.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    Fields fields = MultiFields.getFields(r);
+    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
+    Terms terms = fields.terms("field");
+    String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
+    assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
+    r.close();
+    w.close();
+    d.close();
+  }
 }