You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/12/15 21:35:18 UTC
[03/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7576: detect when
special case automaton is passed to Terms.intersect
LUCENE-7576: detect when special case automaton is passed to Terms.intersect
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fcccd317
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fcccd317
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fcccd317
Branch: refs/heads/jira/solr-8593
Commit: fcccd317ddb44a742a0b3265fcf32923649f38cd
Parents: c61268f
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 15:26:04 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 15:26:04 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++++
.../lucene/codecs/blocktree/FieldReader.java | 3 +++
.../src/java/org/apache/lucene/index/Terms.java | 8 ++++++--
.../org/apache/lucene/index/TestTermsEnum.java | 18 ++++++++++++++++++
4 files changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e62a99d..4afc507 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,10 @@ Bug Fixes
* LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
component when preserveOriginal was set to true. (Adrien Grand)
+* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
+ the incoming automaton is a special case and throw a clearer
+ exception than NullPointerException (Tom Mortimer via Mike McCandless)
+
Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 7f13a32..4ee3826 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
//System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
// can we optimize knowing that...?
+ if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+ throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+ }
return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/index/Terms.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java
index dd48ce9..7197e25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@@ -49,8 +49,12 @@ public abstract class Terms {
* provided <code>startTerm</code> must be accepted by
* the automaton.
*
- * <p><b>NOTE</b>: the returned TermsEnum cannot
- * seek</p>.
+ * <p>This is an expert low-level API and will only work
+ * for {@code NORMAL} compiled automata. To handle any
+ * compiled automata you should instead use
+ * {@link CompiledAutomaton#getTermsEnum} instead.
+ *
+ * <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
*
* <p><b>NOTE</b>: the terms dictionary is free to
* return arbitrary terms as long as the resulted visited
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index 3f15381..a388d42 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -998,4 +998,22 @@ public class TestTermsEnum extends LuceneTestCase {
}
dir.close();
}
+
+ // LUCENE-7576
+ public void testIntersectRegexp() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+ Document doc = new Document();
+ doc.add(newStringField("field", "foobar", Field.Store.NO));
+ w.addDocument(doc);
+ IndexReader r = w.getReader();
+ Fields fields = MultiFields.getFields(r);
+ CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
+ Terms terms = fields.terms("field");
+ String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
+ assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
+ r.close();
+ w.close();
+ d.close();
+ }
}