You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by eh...@apache.org on 2005/06/03 02:22:47 UTC

svn commit: r179679 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/spans/SpanTermQuery.java src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java

Author: ehatcher
Date: Thu Jun  2 17:22:47 2005
New Revision: 179679

URL: http://svn.apache.org/viewcvs?rev=179679&view=rev
Log:
#35157 - Fix for SpanTermQuerys in a BooleanQuery.  Patch from Reece Wilton.

Added:
    lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
    lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/search/spans/SpanTermQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=179679&r1=179678&r2=179679&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Jun  2 17:22:47 2005
@@ -168,6 +168,10 @@
     corrupted when the old version of a file was longer than the new.
     Now any existing file is first removed.  (Doug Cutting)
 
+ 9. Fix BooleanQuery containing nested SpanTermQuery's, which previously
+    could return an incorrect number of hits.
+    (Reece Wilton via Erik Hatcher, Bug #35157)
+
 Optimizations
      
  1. Disk usage (peak requirements during indexing and optimization)

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=179679&r1=179678&r2=179679&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Thu Jun  2 17:22:47 2005
@@ -89,6 +89,11 @@
         }
 
         public boolean skipTo(int target) throws IOException {
+          // are we already at the correct position?
+          if (doc >= target) {
+            return true;
+          }
+
           if (!positions.skipTo(target)) {
             doc = Integer.MAX_VALUE;
             return false;

Added: lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java?rev=179679&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java Thu Jun  2 17:22:47 2005
@@ -0,0 +1,164 @@
+package org.apache.lucene.search.spans;
+
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/*******************************************************************************
+ * Tests the span query bug in Lucene. It demonstrates that SpanTermQuerys don't
+ * work correctly in a BooleanQuery.
+ *
+ * @author Reece Wilton
+ */
+public class TestSpansAdvanced extends TestCase {
+
+    // location to the index
+    protected Directory mDirectory;;
+
+    // field names in the index
+    private final static String FIELD_ID = "ID";
+    protected final static String FIELD_TEXT = "TEXT";
+
+    /**
+     * Initializes the tests by adding 4 identical documents to the index.
+     */
+    protected void setUp() throws Exception {
+
+        super.setUp();
+
+        // create test index
+        mDirectory = new RAMDirectory();
+        final IndexWriter writer = new IndexWriter(mDirectory, new StandardAnalyzer(), true);
+        addDocument(writer, "1", "I think it should work.");
+        addDocument(writer, "2", "I think it should work.");
+        addDocument(writer, "3", "I think it should work.");
+        addDocument(writer, "4", "I think it should work.");
+        writer.close();
+    }
+
+    protected void tearDown() throws Exception {
+
+        mDirectory.close();
+        mDirectory = null;
+    }
+
+    /**
+     * Adds the document to the index.
+     *
+     * @param writer the Lucene index writer
+     * @param id the unique id of the document
+     * @param text the text of the document
+     * @throws IOException
+     */
+    protected void addDocument(final IndexWriter writer, final String id, final String text) throws IOException {
+
+        final Document document = new Document();
+        document.add(new Field(FIELD_ID, id, Field.Store.YES, Field.Index.UN_TOKENIZED));
+        document.add(new Field(FIELD_TEXT, text, Field.Store.YES, Field.Index.TOKENIZED));
+        writer.addDocument(document);
+    }
+
+    /**
+     * Tests two span queries.
+     *
+     * ERROR: Lucene returns the incorrect number of results and the scoring for
+     * the results is incorrect.
+     *
+     * @throws IOException
+     */
+    public void testBooleanQueryWithSpanQueries() throws IOException {
+
+        doTestBooleanQueryWithSpanQueries(0.3884282f);
+    }
+
+    /**
+     * Tests two span queries.
+     *
+     * ERROR: Lucene returns the incorrect number of results and the scoring for
+     * the results is incorrect.
+     *
+     * @throws IOException
+     */
+    protected void doTestBooleanQueryWithSpanQueries(final float expectedScore) throws IOException {
+
+        final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "work"));
+        final BooleanQuery query = new BooleanQuery();
+        query.add(spanQuery, BooleanClause.Occur.MUST);
+        query.add(spanQuery, BooleanClause.Occur.MUST);
+        final Hits hits = executeQuery(query);
+        final String[] expectedIds = new String[] { "1", "2", "3", "4" };
+        final float[] expectedScores = new float[] { expectedScore, expectedScore, expectedScore, expectedScore };
+        assertHits(hits, "two span queries", expectedIds, expectedScores);
+    }
+
+    /**
+     * Executes the query and throws an assertion if the results don't match the
+     * expectedHits.
+     *
+     * @param query the query to execute
+     * @throws IOException
+     */
+    protected Hits executeQuery(final Query query) throws IOException {
+
+        final IndexSearcher searcher = new IndexSearcher(mDirectory);
+        final Hits hits = searcher.search(query);
+        searcher.close();
+        return hits;
+    }
+
+    /**
+     * Checks to see if the hits are what we expected.
+     *
+     * @param hits the search results
+     * @param description the description of the search
+     * @param expectedIds the expected document ids of the hits
+     * @param expectedScores the expected scores of the hits
+     *
+     * @throws IOException
+     */
+    protected void assertHits(final Hits hits, final String description, final String[] expectedIds,
+            final float[] expectedScores) throws IOException {
+
+        // display the hits
+        System.out.println(hits.length() + " hits for search: \"" + description + '\"');
+        for (int i = 0; i < hits.length(); i++) {
+            System.out.println("  " + FIELD_ID + ':' + hits.doc(i).get(FIELD_ID) + " (score:" + hits.score(i) + ')');
+        }
+
+        // did we get the hits we expected
+        assertEquals(expectedIds.length, hits.length());
+        for (int i = 0; i < hits.length(); i++) {
+            assertTrue(expectedIds[i].equals(hits.doc(i).get(FIELD_ID)));
+            assertEquals(expectedScores[i], hits.score(i), 0);
+        }
+    }
+}
\ No newline at end of file

Added: lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java?rev=179679&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java Thu Jun  2 17:22:47 2005
@@ -0,0 +1,109 @@
+package org.apache.lucene.search.spans;
+
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+
+/*******************************************************************************
+ * Some expanded tests to make sure my patch doesn't break other SpanTermQuery
+ * functionality.
+ *
+ * @author Reece Wilton
+ */
+public class TestSpansAdvanced2 extends TestSpansAdvanced {
+
+    /**
+     * Initializes the tests by adding documents to the index.
+     */
+    protected void setUp() throws Exception {
+        super.setUp();
+
+        // create test index
+        final IndexWriter writer = new IndexWriter(mDirectory, new StandardAnalyzer(), false);
+        addDocument(writer, "A", "Should we, could we, would we?");
+        addDocument(writer, "B", "It should.  Should it?");
+        addDocument(writer, "C", "It shouldn't.");
+        addDocument(writer, "D", "Should we, should we, should we.");
+        writer.close();
+    }
+
+    /**
+     * Verifies that the index has the correct number of documents.
+     *
+     * @throws Exception
+     */
+    public void testVerifyIndex() throws Exception {
+        final IndexReader reader = IndexReader.open(mDirectory);
+        assertEquals(8, reader.numDocs());
+        reader.close();
+    }
+
+    /**
+     * Tests a single span query that matches multiple documents.
+     *
+     * @throws IOException
+     */
+    public void testSingleSpanQuery() throws IOException {
+
+        final Query spanQuery = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
+        final Hits hits = executeQuery(spanQuery);
+        final String[] expectedIds = new String[] { "B", "D", "1", "2", "3", "4", "A" };
+        final float[] expectedScores = new float[] { 0.625f, 0.45927936f, 0.35355338f, 0.35355338f, 0.35355338f,
+                0.35355338f, 0.26516503f, };
+        assertHits(hits, "single span query", expectedIds, expectedScores);
+    }
+
+    /**
+     * Tests a single span query that matches multiple documents.
+     *
+     * @throws IOException
+     */
+    public void testMultipleDifferentSpanQueries() throws IOException {
+
+        final Query spanQuery1 = new SpanTermQuery(new Term(FIELD_TEXT, "should"));
+        final Query spanQuery2 = new SpanTermQuery(new Term(FIELD_TEXT, "we"));
+        final BooleanQuery query = new BooleanQuery();
+        query.add(spanQuery1, BooleanClause.Occur.MUST);
+        query.add(spanQuery2, BooleanClause.Occur.MUST);
+        final Hits hits = executeQuery(query);
+        final String[] expectedIds = new String[] { "A", "D" };
+        final float[] expectedScores = new float[] { 0.93163157f, 0.20698164f };
+        assertHits(hits, "multiple different span queries", expectedIds, expectedScores);
+    }
+
+    /**
+     * Tests two span queries.
+     *
+     * ERROR: Lucene returns the incorrect number of results and the scoring for
+     * the results is incorrect.
+     *
+     * @throws IOException
+     */
+    public void testBooleanQueryWithSpanQueries() throws IOException {
+
+        doTestBooleanQueryWithSpanQueries(0.73500174f);
+    }
+}