You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by alessandrobenedetti <gi...@git.apache.org> on 2018/05/31 15:40:49 UTC
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
GitHub user alessandrobenedetti opened a pull request:
https://github.com/apache/lucene-solr/pull/389
[LUCENE-6687] not necessary nested for loop removed for terms retriev…
Bug in term frequencies calculation for the MLT
You can merge this pull request into a Git repository by running:
$ git pull https://github.com/SeaseLtd/lucene-solr LUCENE-6687
Alternatively you can review and apply these changes as the patch at:
https://github.com/apache/lucene-solr/pull/389.patch
To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:
This closes #389
----
commit 4cc6731adfd1d697d528c0963765fa27a5ca0e6a
Author: Alessandro Benedetti <a....@...>
Date: 2018-05-31T15:39:15Z
[LUCENE-6687] not necessary nested for loop removed for terms retrieval in More Like This
----
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223770725
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1, sampleField2});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue = "apache apache lucene lucene lucene";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 1 clauses only!", 1, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertThat(term, is(new Term(sampleField1, "lucene")));
+ }
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
+ }
+
+ assertEquals("Expected 0 clauses only!", 0, clauses.size());
+
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(1);
--- End diff --
put it to 0
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223774170
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1, sampleField2});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue = "apache apache lucene lucene lucene";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 1 clauses only!", 1, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertThat(term, is(new Term(sampleField1, "lucene")));
+ }
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
+ }
+
+ assertEquals("Expected 0 clauses only!", 0, clauses.size());
+
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(2);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ HashSet<Term> clausesTerms = new HashSet<>();
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ clausesTerms.add(term);
+ }
+ assertEquals("Expected 2 clauses only!", 2, clauses.size());
+
+ HashSet<Term> expectedTerms = new HashSet<>();
+ expectedTerms.add(new Term("text", "apache"));
+ expectedTerms.add(new Term("text", "lucene"));
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache2"));
+ unexpectedTerms.add(new Term("text", "lucene2"));
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
--- End diff --
move clauses terms accumulation here
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223773915
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1, sampleField2});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue = "apache apache lucene lucene lucene";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 1 clauses only!", 1, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertThat(term, is(new Term(sampleField1, "lucene")));
+ }
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
+ }
+
+ assertEquals("Expected 0 clauses only!", 0, clauses.size());
+
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(2);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ HashSet<Term> clausesTerms = new HashSet<>();
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ clausesTerms.add(term);
+ }
+ assertEquals("Expected 2 clauses only!", 2, clauses.size());
+
+ HashSet<Term> expectedTerms = new HashSet<>();
--- End diff --
move to the beginning
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223764522
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
--- End diff --
move to class constant
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223769747
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1, sampleField2});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue = "apache apache lucene lucene lucene";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 1 clauses only!", 1, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertThat(term, is(new Term(sampleField1, "lucene")));
+ }
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
--- End diff --
reference sampleField1 constant
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org
[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...
Posted by alessandrobenedetti <gi...@git.apache.org>.
Github user alessandrobenedetti commented on a diff in the pull request:
https://github.com/apache/lucene-solr/pull/389#discussion_r223775673
--- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
@@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
analyzer.close();
}
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1, sampleField2});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue = "apache apache lucene lucene lucene";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 1 clauses only!", 1, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertThat(term, is(new Term(sampleField1, "lucene")));
+ }
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(0);
+ mlt.setMinTermFreq(3);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
+ unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
+ }
+
+ assertEquals("Expected 0 clauses only!", 0, clauses.size());
+
+ analyzer.close();
+ }
+
+ public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
+ mlt.setAnalyzer(analyzer);
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(2);
+ mlt.setMinWordLen(1);
+ String sampleField1 = "text";
+ String sampleField2 = "text2";
+ mlt.setFieldNames(new String[]{sampleField1});
+
+ Map<String, Collection<Object>> filteredDocument = new HashMap<>();
+ String textValue1 = "apache apache lucene lucene";
+ String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
+ filteredDocument.put(sampleField1, Arrays.asList(textValue1));
+ filteredDocument.put(sampleField2, Arrays.asList(textValue2));
+
+ BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
+ Collection<BooleanClause> clauses = query.clauses();
+ HashSet<Term> clausesTerms = new HashSet<>();
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ clausesTerms.add(term);
+ }
+ assertEquals("Expected 2 clauses only!", 2, clauses.size());
+
+ HashSet<Term> expectedTerms = new HashSet<>();
+ expectedTerms.add(new Term("text", "apache"));
+ expectedTerms.add(new Term("text", "lucene"));
+
+ HashSet<Term> unexpectedTerms = new HashSet<>();
+ unexpectedTerms.add(new Term("text", "apache2"));
+ unexpectedTerms.add(new Term("text", "lucene2"));
+
+ //None of the Not Expected terms is in the query
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
+ }
+
+ //All of the Expected terms are in the query
+ for (Term expectedTerm : expectedTerms) {
+ assertTrue("Expected term '" + expectedTerm + "' is not found in query terms", clausesTerms.contains(expectedTerm));
+ }
+
--- End diff --
Add this -> All of the terms in the query must be expected
---
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org