You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/16 15:47:17 UTC
svn commit: r1483364 - in /lucene/dev/branches/lucene_solr_4_3: ./ lucene/
lucene/queryparser/
lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/
lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/
lucene/queryparser/src/...
Author: shalin
Date: Thu May 16 13:47:17 2013
New Revision: 1483364
URL: http://svn.apache.org/r1483364
Log:
LUCENE-4991: QueryParser doesnt handle synonyms correctly for chinese
Modified:
lucene/dev/branches/lucene_solr_4_3/ (props changed)
lucene/dev/branches/lucene_solr_4_3/lucene/ (props changed)
lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/ (props changed)
lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
Modified: lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt?rev=1483364&r1=1483363&r2=1483364&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt Thu May 16 13:47:17 2013
@@ -38,6 +38,11 @@ Bug Fixes
fails to reflect a delete from IndexWriter.tryDeleteDocument (Reg,
Mike McCandless)
+* LUCENE-4991: Fix handling of synonyms in classic QueryParser.getFieldQuery for
+ terms not separated by whitespace. PositionIncrementAttribute was ignored, so with
+ default AND synonyms wrongly became mandatory clauses, and with OR, the
+ coordination factor was wrong. (æå¨, Robert Muir)
+
======================= Lucene 4.3.0 =======================
Changes in backwards compatibility policy
Modified: lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java?rev=1483364&r1=1483363&r2=1483364&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java Thu May 16 13:47:17 2013
@@ -576,24 +576,53 @@ public abstract class QueryParserBase im
if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) {
if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) {
// no phrase query:
- BooleanQuery q = newBooleanQuery(positionCount == 1);
-
- BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
- BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
-
- for (int i = 0; i < numTokens; i++) {
- try {
- boolean hasNext = buffer.incrementToken();
- assert hasNext == true;
- termAtt.fillBytesRef();
- } catch (IOException e) {
- // safe to ignore, because we know the number of tokens
+
+ if (positionCount == 1) {
+ // simple case: only one position, with synonyms
+ BooleanQuery q = newBooleanQuery(true);
+ for (int i = 0; i < numTokens; i++) {
+ try {
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ termAtt.fillBytesRef();
+ } catch (IOException e) {
+ // safe to ignore, because we know the number of tokens
+ }
+ Query currentQuery = newTermQuery(
+ new Term(field, BytesRef.deepCopyOf(bytes)));
+ q.add(currentQuery, BooleanClause.Occur.SHOULD);
+ }
+ return q;
+ } else {
+ // multiple positions
+ BooleanQuery q = newBooleanQuery(false);
+ final BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
+ Query currentQuery = null;
+ for (int i = 0; i < numTokens; i++) {
+ try {
+ boolean hasNext = buffer.incrementToken();
+ assert hasNext == true;
+ termAtt.fillBytesRef();
+ } catch (IOException e) {
+ // safe to ignore, because we know the number of tokens
+ }
+ if (posIncrAtt != null && posIncrAtt.getPositionIncrement() == 0) {
+ if (!(currentQuery instanceof BooleanQuery)) {
+ Query t = currentQuery;
+ currentQuery = newBooleanQuery(true);
+ ((BooleanQuery)currentQuery).add(t, BooleanClause.Occur.SHOULD);
+ }
+ ((BooleanQuery)currentQuery).add(newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))), BooleanClause.Occur.SHOULD);
+ } else {
+ if (currentQuery != null) {
+ q.add(currentQuery, occur);
+ }
+ currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes)));
+ }
}
- Query currentQuery = newTermQuery(
- new Term(field, BytesRef.deepCopyOf(bytes)));
q.add(currentQuery, occur);
+ return q;
}
- return q;
}
else {
// phrase query:
Modified: lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java?rev=1483364&r1=1483363&r2=1483364&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java Thu May 16 13:47:17 2013
@@ -17,9 +17,17 @@ package org.apache.lucene.queryparser.cl
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
@@ -27,6 +35,7 @@ import org.apache.lucene.queryparser.fle
import org.apache.lucene.queryparser.util.QueryParserTestBase;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -307,4 +316,178 @@ public class TestQueryParser extends Que
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
+ // TODO: fold these into QueryParserTestBase
+
+ /** adds synonym of "dog" for "dogs". */
+ static class MockSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ MockTokenizer tokenizer = new MockTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
+ }
+ }
+
+ /** simple synonyms test */
+ public void testSynonyms() throws Exception {
+ BooleanQuery expected = new BooleanQuery(true);
+ expected.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer());
+ assertEquals(expected, qp.parse("dogs"));
+ assertEquals(expected, qp.parse("\"dogs\""));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("dogs"));
+ assertEquals(expected, qp.parse("\"dogs\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("dogs^2"));
+ assertEquals(expected, qp.parse("\"dogs\"^2"));
+ }
+
+ /** forms multiphrase query */
+ public void testSynonymsPhrase() throws Exception {
+ MultiPhraseQuery expected = new MultiPhraseQuery();
+ expected.add(new Term("field", "old"));
+ expected.add(new Term[] { new Term("field", "dogs"), new Term("field", "dog") });
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockSynonymAnalyzer());
+ assertEquals(expected, qp.parse("\"old dogs\""));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("\"old dogs\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("\"old dogs\"^2"));
+ expected.setSlop(3);
+ assertEquals(expected, qp.parse("\"old dogs\"~3^2"));
+ }
+
+ /**
+ * adds synonym of "å" for "å½".
+ */
+ protected static class MockCJKSynonymFilter extends TokenFilter {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ boolean addSynonym = false;
+
+ public MockCJKSynonymFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (addSynonym) { // inject our synonym
+ clearAttributes();
+ termAtt.setEmpty().append("å");
+ posIncAtt.setPositionIncrement(0);
+ addSynonym = false;
+ return true;
+ }
+
+ if (input.incrementToken()) {
+ addSynonym = termAtt.toString().equals("å½");
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ static class MockCJKSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new SimpleCJKTokenizer(reader);
+ return new TokenStreamComponents(tokenizer, new MockCJKSynonymFilter(tokenizer));
+ }
+ }
+
+ /** simple CJK synonym test */
+ public void testCJKSynonym() throws Exception {
+ BooleanQuery expected = new BooleanQuery(true);
+ expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ expected.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("å½"));
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("å½^2"));
+ }
+
+ /** synonyms with default OR operator */
+ public void testCJKSynonymsOR() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("ä¸å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½^2"));
+ }
+
+ /** more complex synonyms with default OR operator */
+ public void testCJKSynonymsOR2() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.SHOULD);
+ BooleanQuery inner2 = new BooleanQuery(true);
+ inner2.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner2.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner2, BooleanClause.Occur.SHOULD);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ assertEquals(expected, qp.parse("ä¸å½å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½å½^2"));
+ }
+
+ /** synonyms with default AND operator */
+ public void testCJKSynonymsAND() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.MUST);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.MUST);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("ä¸å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½^2"));
+ }
+
+ /** more complex synonyms with default AND operator */
+ public void testCJKSynonymsAND2() throws Exception {
+ BooleanQuery expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.MUST);
+ BooleanQuery inner = new BooleanQuery(true);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.MUST);
+ BooleanQuery inner2 = new BooleanQuery(true);
+ inner2.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ inner2.add(new TermQuery(new Term("field", "å")), BooleanClause.Occur.SHOULD);
+ expected.add(inner2, BooleanClause.Occur.MUST);
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("ä¸å½å½"));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("ä¸å½å½^2"));
+ }
+
+ /** forms multiphrase query */
+ public void testCJKSynonymsPhrase() throws Exception {
+ MultiPhraseQuery expected = new MultiPhraseQuery();
+ expected.add(new Term("field", "ä¸"));
+ expected.add(new Term[] { new Term("field", "å½"), new Term("field", "å")});
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockCJKSynonymAnalyzer());
+ qp.setDefaultOperator(Operator.AND);
+ assertEquals(expected, qp.parse("\"ä¸å½\""));
+ expected.setBoost(2.0f);
+ assertEquals(expected, qp.parse("\"ä¸å½\"^2"));
+ expected.setSlop(3);
+ assertEquals(expected, qp.parse("\"ä¸å½\"~3^2"));
+ }
+
}
Modified: lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java?rev=1483364&r1=1483363&r2=1483364&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java Thu May 16 13:47:17 2013
@@ -236,7 +236,7 @@ public abstract class QueryParserTestBas
}
//individual CJK chars as terms, like StandardAnalyzer
- private class SimpleCJKTokenizer extends Tokenizer {
+ protected static class SimpleCJKTokenizer extends Tokenizer {
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public SimpleCJKTokenizer(Reader input) {
@@ -244,7 +244,7 @@ public abstract class QueryParserTestBas
}
@Override
- public boolean incrementToken() throws IOException {
+ public final boolean incrementToken() throws IOException {
int ch = input.read();
if (ch < 0)
return false;
@@ -1088,7 +1088,7 @@ public abstract class QueryParserTestBas
/**
* adds synonym of "dog" for "dogs".
*/
- private class MockSynonymFilter extends TokenFilter {
+ protected static class MockSynonymFilter extends TokenFilter {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
boolean addSynonym = false;