You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/12/02 20:26:47 UTC

lucene-solr:branch_6x: LUCENE-7576: detect when special case automaton is passed to Terms.intersect

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x f1e402e39 -> b6072f3ae


LUCENE-7576: detect when special case automaton is passed to Terms.intersect


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b6072f3a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b6072f3a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b6072f3a

Branch: refs/heads/branch_6x
Commit: b6072f3ae539a5fc45a2bb9f99441dfeef4e440a
Parents: f1e402e
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 15:26:04 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 15:26:32 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                                |  4 ++++
 .../lucene/codecs/blocktree/FieldReader.java      |  3 +++
 .../src/java/org/apache/lucene/index/Terms.java   |  8 ++++++--
 .../org/apache/lucene/index/TestTermsEnum.java    | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6072f3a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 45ba4c1..b0a5f9c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -30,6 +30,10 @@ Bug Fixes
 * LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
   component when preserveOriginal was set to true. (Adrien Grand)
 
+* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
+  the incoming automaton is a special case and throw a clearer
+  exception than NullPointerException (Tom Mortimer via Mike McCandless)
+
 Improvements
 
 * LUCENE-7532: Add back lost codec file format documentation

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6072f3a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 1e92a43..05f0a12 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
     //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
     // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
     // can we optimize knowing that...?
+    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+    }
     return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
   }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6072f3a/lucene/core/src/java/org/apache/lucene/index/Terms.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java
index dd48ce9..7197e25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@@ -49,8 +49,12 @@ public abstract class Terms {
    *  provided <code>startTerm</code> must be accepted by
    *  the automaton.
    *
-   * <p><b>NOTE</b>: the returned TermsEnum cannot
-   * seek</p>.
+   *  <p>This is an expert low-level API and will only work
+   *  for {@code NORMAL} compiled automata.  To handle any
+   *  compiled automata you should instead use
+   *  {@link CompiledAutomaton#getTermsEnum} instead.
+   *
+   *  <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
    *
    *  <p><b>NOTE</b>: the terms dictionary is free to
    *  return arbitrary terms as long as the resulted visited

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b6072f3a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index cd72f41..ef3bf8b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -993,4 +993,22 @@ public class TestTermsEnum extends LuceneTestCase {
     }
     dir.close();
   }
+
+  // LUCENE-7576
+  public void testIntersectRegexp() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    doc.add(newStringField("field", "foobar", Field.Store.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    Fields fields = MultiFields.getFields(r);
+    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
+    Terms terms = fields.terms("field");
+    String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
+    assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
+    r.close();
+    w.close();
+    d.close();
+  }
 }