You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2017/02/22 10:16:59 UTC
lucene-solr:master: LUCENE-7698: fix CommonGramsQueryFilter to not
produce a disconnected token graph
Repository: lucene-solr
Updated Branches:
refs/heads/master 14b362260 -> b9c9cddff
LUCENE-7698: fix CommonGramsQueryFilter to not produce a disconnected token graph
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b9c9cddf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b9c9cddf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b9c9cddf
Branch: refs/heads/master
Commit: b9c9cddff7cef08e8b0433a203771e48e662e7b1
Parents: 14b3622
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Feb 22 05:16:47 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Feb 22 05:16:47 2017 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++++
.../commongrams/CommonGramsQueryFilter.java | 6 ++++++
.../TestCommonGramsQueryFilterFactory.java | 10 ++++++++++
.../java/org/apache/lucene/util/QueryBuilder.java | 17 ++++++++++++++++-
.../queryparser/classic/TestQueryParser.java | 18 ++++++++++++++++++
5 files changed, 54 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b9c9cddf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a3028ac..dda4a45 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -142,6 +142,10 @@ Bug Fixes
rewritten child query in their equals and hashCode implementations.
(Adrien Grand)
+* LUCENE-7698: CommonGramsQueryFilter was producing a disconnected
+ token graph, messing up phrase queries when it was used during query
+ parsing (Ere Maijala via Mike McCandless)
+
Improvements
* LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b9c9cddf/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
index eeaec84..9307e7b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import static org.apache.lucene.analysis.commongrams.CommonGramsFilter.GRAM_TYPE;
@@ -46,6 +47,7 @@ public final class CommonGramsQueryFilter extends TokenFilter {
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLengthAttribute = addAttribute(PositionLengthAttribute.class);
private State previous;
private String previousType;
@@ -91,6 +93,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
if (isGramType()) {
posIncAttribute.setPositionIncrement(1);
+ // We must set this back to 1 (from e.g. 2 or higher) otherwise the token graph is disconnected:
+ posLengthAttribute.setPositionLength(1);
}
return true;
}
@@ -109,6 +113,8 @@ public final class CommonGramsQueryFilter extends TokenFilter {
if (isGramType()) {
posIncAttribute.setPositionIncrement(1);
+ // We must set this back to 1 (from e.g. 2 or higher) otherwise the token graph is disconnected:
+ posLengthAttribute.setPositionLength(1);
}
return true;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b9c9cddf/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
index 23d1bd4..fee6e88 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
@@ -92,4 +92,14 @@ public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamFactoryTes
});
assertTrue(expected.getMessage().contains("Unknown parameters"));
}
+
+ public void testCompleteGraph() throws Exception {
+ CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery");
+ CharArraySet words = factory.getCommonWords();
+ assertTrue("words is null and it shouldn't be", words != null);
+ assertTrue(words.contains("the"));
+ Tokenizer tokenizer = whitespaceMockTokenizer("testing the factory works");
+ TokenStream stream = factory.create(tokenizer);
+ assertGraphStrings(stream, "testing_the the_factory factory works");
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b9c9cddf/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
index b8d8c29..c399842 100644
--- a/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/QueryBuilder.java
@@ -57,6 +57,7 @@ import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
public class QueryBuilder {
protected Analyzer analyzer;
protected boolean enablePositionIncrements = true;
+ protected boolean enableGraphQueries = true;
protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false;
/** Creates a new QueryBuilder using the given analyzer. */
@@ -240,6 +241,20 @@ public class QueryBuilder {
}
}
+ /** Enable or disable graph TokenStream processing (enabled by default).
+ *
+ * @lucene.experimental */
+ public void setEnableGraphQueries(boolean v) {
+ enableGraphQueries = v;
+ }
+
+ /** Returns true if graph TokenStream processing is enabled (default).
+ *
+ * @lucene.experimental */
+ public boolean getEnableGraphQueries() {
+ return enableGraphQueries;
+ }
+
/**
* Creates a query from a token stream.
*
@@ -282,7 +297,7 @@ public class QueryBuilder {
}
int positionLength = posLenAtt.getPositionLength();
- if (!isGraph && positionLength > 1) {
+ if (enableGraphQueries && positionLength > 1) {
isGraph = true;
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b9c9cddf/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
index 1d7a0f6..e83b62a 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
@@ -533,6 +533,24 @@ public class TestQueryParser extends QueryParserTestBase {
assertEquals(guineaPigPhrase, smart.parse("\"guinea pig\""));
}
+ public void testEnableGraphQueries() throws Exception {
+ QueryParser dumb = new QueryParser("field", new Analyzer1());
+ dumb.setSplitOnWhitespace(false);
+ dumb.setEnableGraphQueries(false);
+
+ TermQuery guinea = new TermQuery(new Term("field", "guinea"));
+ TermQuery pig = new TermQuery(new Term("field", "pig"));
+ TermQuery cavy = new TermQuery(new Term("field", "cavy"));
+
+ // A multi-word synonym source will just form a boolean query when graph queries are disabled:
+ Query inner = new SynonymQuery(new Term[] {new Term("field", "cavy"), new Term("field", "guinea")});
+ BooleanQuery.Builder b = new BooleanQuery.Builder();
+ b.add(inner, BooleanClause.Occur.SHOULD);
+ b.add(pig, BooleanClause.Occur.SHOULD);
+ BooleanQuery query = b.build();
+ assertEquals(query, dumb.parse("guinea pig"));
+ }
+
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testOperatorsAndMultiWordSynonyms() throws Exception {
Analyzer a = new MockSynonymAnalyzer();