You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by bu...@apache.org on 2004/09/15 13:58:22 UTC
DO NOT REPLY [Bug 31241] New: - Sorting produces duplicities

DO NOT REPLY TO THIS EMAIL, BUT PLEASE POST YOUR BUG 
RELATED COMMENTS THROUGH THE WEB INTERFACE AVAILABLE AT
<http://issues.apache.org/bugzilla/show_bug.cgi?id=31241>.
ANY REPLY MADE TO THIS MESSAGE WILL NOT BE COLLECTED AND 
INSERTED IN THE BUG DATABASE.

http://issues.apache.org/bugzilla/show_bug.cgi?id=31241

Sorting produces duplicities

           Summary: Sorting produces duplicities
           Product: Lucene
           Version: 1.4
          Platform: All
        OS/Version: All
            Status: NEW
          Severity: Normal
          Priority: Other
         Component: Search
        AssignedTo: lucene-dev@jakarta.apache.org
        ReportedBy: kuhn@fg.cz


If you run the code below the exception will be thrown. I believe that it isn't 
correct behaviour (the duplicities, of course), index id of hits should be 
unique as it is without sort.

Lucene versions:
1.4-final
1.4.1
CVS 1.5-rc1-dev


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.ListIterator;
import java.util.Set;

/**
 * Run this test with Lucene 1.4 final or 1.4.1
 */
public class DuplicityTest
{
    public static void main(String[] args) throws IOException, ParseException
    {
        Directory directory = create_index();

        search_index(directory);
    }

    private static void search_index(Directory directory) throws IOException, 
ParseException
    {
        IndexReader reader = IndexReader.open(directory);
        Searcher searcher = new IndexSearcher(reader);

        Sort sort = new Sort(new SortField("co", SortField.INT, false));

        Query q = QueryParser.parse("sword", "text", new StandardAnalyzer());

        find_duplicity(searcher.search(q), "no sort");

        find_duplicity(searcher.search(q, sort), "using sort");

        searcher.close();
        reader.close();
    }

    private static void find_duplicity(Hits hits, String message) throws 
IOException
    {
        System.out.println(message + " hits size: " + hits.length());

        Set set = new HashSet();
        for (int i = 0; i < hits.length(); i++) {
//            System.out.println(hits.id(i) + ": " + hits.doc(i).toString());
            Integer id = new Integer(hits.id(i));
            if (!set.contains(id))
                set.add(id);
            else
                throw new RuntimeException("duplicity found, index id: " + id);
        }
        System.out.println("no duplicity found");
    }

    private static LinkedList words;

    static {
        words = new LinkedList();

        words.add("word");
        words.add("sword");
        words.add("dwarf");
        words.add("whale");
        words.add("male");
    }

    private static Directory create_index() throws IOException
    {
        Directory directory = new RAMDirectory();

        ListIterator e_words1 = words.listIterator();
        ListIterator e_words2 = words.listIterator(words.size());

        IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(), 
true);

        int co = 1;

        for (int i = 0; i < 300; i++) {

            if (!e_words1.hasNext()) {
                e_words1 = words.listIterator();
                e_words1.hasNext();
            }
            String word1 = (String)e_words1.next();
            if (!e_words2.hasPrevious()) {
                e_words2 = words.listIterator(words.size());
                e_words2.hasPrevious();
            }
            String word2 = (String)e_words2.previous();

            Document doc = new Document();

            doc.add(Field.Keyword("co", String.valueOf(co)));
            doc.add(Field.Text("text", word1 + " " + word2));
            writer.addDocument(doc);

            if (i % 20 == 0)
                co++;
        }
        writer.optimize();
        System.err.println("index size: " + writer.docCount());
        writer.close();

        return directory;
    }
}

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org