You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@lucene.apache.org by alessandrobenedetti <gi...@git.apache.org> on 2018/05/31 15:40:49 UTC

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

GitHub user alessandrobenedetti opened a pull request:

    https://github.com/apache/lucene-solr/pull/389

    [LUCENE-6687] not necessary nested for loop removed for terms retriev…

    Bug in term frequencies calculation for the MLT

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/SeaseLtd/lucene-solr LUCENE-6687

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/lucene-solr/pull/389.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #389
    
----
commit 4cc6731adfd1d697d528c0963765fa27a5ca0e6a
Author: Alessandro Benedetti <a....@...>
Date:   2018-05-31T15:39:15Z

    [LUCENE-6687] not necessary nested for loop removed for terms retrieval in More Like This

----


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223770725
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1, sampleField2});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue = "apache apache lucene lucene lucene";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    assertEquals("Expected 1 clauses only!", 1, clauses.size());
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertThat(term, is(new Term(sampleField1, "lucene")));
    +    }
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    +    }
    +
    +    assertEquals("Expected 0 clauses only!", 0, clauses.size());
    +
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(1);
    --- End diff --
    
    put it to 0


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223774170
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1, sampleField2});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue = "apache apache lucene lucene lucene";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    assertEquals("Expected 1 clauses only!", 1, clauses.size());
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertThat(term, is(new Term(sampleField1, "lucene")));
    +    }
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    +    }
    +
    +    assertEquals("Expected 0 clauses only!", 0, clauses.size());
    +
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(1);
    +    mlt.setMinTermFreq(2);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    HashSet<Term> clausesTerms = new HashSet<>();
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      clausesTerms.add(term);
    +    }
    +    assertEquals("Expected 2 clauses only!", 2, clauses.size());
    +
    +    HashSet<Term> expectedTerms = new HashSet<>();
    +    expectedTerms.add(new Term("text", "apache"));
    +    expectedTerms.add(new Term("text", "lucene"));
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache2"));
    +    unexpectedTerms.add(new Term("text", "lucene2"));
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    --- End diff --
    
    move clauses terms accumulation here


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223773915
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1, sampleField2});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue = "apache apache lucene lucene lucene";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    assertEquals("Expected 1 clauses only!", 1, clauses.size());
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertThat(term, is(new Term(sampleField1, "lucene")));
    +    }
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    +    }
    +
    +    assertEquals("Expected 0 clauses only!", 0, clauses.size());
    +
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(1);
    +    mlt.setMinTermFreq(2);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    HashSet<Term> clausesTerms = new HashSet<>();
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      clausesTerms.add(term);
    +    }
    +    assertEquals("Expected 2 clauses only!", 2, clauses.size());
    +
    +    HashSet<Term> expectedTerms = new HashSet<>();
    --- End diff --
    
    move to the beginning


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223764522
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    --- End diff --
    
    move to class constant


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223769747
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1, sampleField2});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue = "apache apache lucene lucene lucene";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    assertEquals("Expected 1 clauses only!", 1, clauses.size());
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertThat(term, is(new Term(sampleField1, "lucene")));
    +    }
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
    --- End diff --
    
    reference sampleField1 constant


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org

[GitHub] lucene-solr pull request #389: [LUCENE-6687] not necessary nested for loop r...

Posted by alessandrobenedetti <gi...@git.apache.org>.

Github user alessandrobenedetti commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/389#discussion_r223775673
  
    --- Diff: lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java ---
    @@ -186,6 +200,117 @@ public void testMultiValues() throws Exception {
         analyzer.close();
       }
     
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryAccordingToCorrectTermFrequencies() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1, sampleField2});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue = "apache apache lucene lucene lucene";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    assertEquals("Expected 1 clauses only!", 1, clauses.size());
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertThat(term, is(new Term(sampleField1, "lucene")));
    +    }
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_minTermFrequencySet_shouldBuildQueryWithCorrectTerms() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(0);
    +    mlt.setMinTermFreq(3);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "apache2"));//Term Frequency < Minimum Accepted Term Frequency
    +    unexpectedTerms.add(new Term("text", "lucene2"));//Wrong Field
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    +    }
    +
    +    assertEquals("Expected 0 clauses only!", 0, clauses.size());
    +
    +    analyzer.close();
    +  }
    +
    +  public void testLiveMapDocument_queryFieldsSet_shouldBuildQueryFromSpecifiedFieldnamesOnly() throws Exception {
    +    MoreLikeThis mlt = new MoreLikeThis(reader);
    +    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    +    mlt.setAnalyzer(analyzer);
    +    mlt.setMinDocFreq(1);
    +    mlt.setMinTermFreq(2);
    +    mlt.setMinWordLen(1);
    +    String sampleField1 = "text";
    +    String sampleField2 = "text2";
    +    mlt.setFieldNames(new String[]{sampleField1});
    +
    +    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    +    String textValue1 = "apache apache lucene lucene";
    +    String textValue2 = "apache2 apache2 lucene2 lucene2 lucene2";
    +    filteredDocument.put(sampleField1, Arrays.asList(textValue1));
    +    filteredDocument.put(sampleField2, Arrays.asList(textValue2));
    +
    +    BooleanQuery query = (BooleanQuery) mlt.like(filteredDocument);
    +    Collection<BooleanClause> clauses = query.clauses();
    +    HashSet<Term> clausesTerms = new HashSet<>();
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      clausesTerms.add(term);
    +    }
    +    assertEquals("Expected 2 clauses only!", 2, clauses.size());
    +
    +    HashSet<Term> expectedTerms = new HashSet<>();
    +    expectedTerms.add(new Term("text", "apache"));
    +    expectedTerms.add(new Term("text", "lucene"));
    +
    +    HashSet<Term> unexpectedTerms = new HashSet<>();
    +    unexpectedTerms.add(new Term("text", "apache2"));
    +    unexpectedTerms.add(new Term("text", "lucene2"));
    +
    +    //None of the Not Expected terms is in the query
    +    for (BooleanClause clause : clauses) {
    +      Term term = ((TermQuery) clause.getQuery()).getTerm();
    +      assertFalse("Unexpected term '" + term + "' found in query terms", unexpectedTerms.contains(term));
    +    }
    +
    +    //All of the Expected terms are in the query
    +    for (Term expectedTerm : expectedTerms) {
    +      assertTrue("Expected term '" + expectedTerm + "' is not found in query terms", clausesTerms.contains(expectedTerm));
    +    }
    +
    --- End diff --
    
    Add this -> All of the terms in the query must be expected


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org