You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Rick Vestal <ri...@cat.utexas.edu> on 2002/06/17 15:33:59 UTC

Deleting documents from index question.

Good morning all,

I'm trying to delete a set of documents from an index,
and am running into a problem where all the documents are
not deleted.  My problem is either the way I am using the API
or it is a bug in lucene...I'm not sure which one it is.

I've included a sample program here that shows the problem.  Note
that you will have to change the path at the top to a valid set
of files on your machine.  If anybody has any ideas on why I
am not removing the files correctly, please let me know.

Thanks,

-- Rick

/*
 * Created by IntelliJ IDEA.
 * User: rvestal
 * Date: Jun 16, 2002
 * Time: 10:23:51 PM
 * To change template for new class use
 * Code Style | Class Templates options (Tools | IDE Options).
 */
package org.intellij.plugins.docPlugin;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;

import java.io.*;
import java.util.Vector;

public class IndexTest {

    // path to ant 1.4.1 docs
    private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";

    private static String INDEX_DIR = "indexTest";


    static private void collectFiles( File dir, Vector files ) {
        File[] children = dir.listFiles();
        for ( int ix = 0; ix < children.length; ix++ ) {
            File child = children[ix];
            if ( child.isDirectory() ) {
                collectFiles( child, files );
            } else {
                files.add( child );
            }
        }
    }


    public static void main( String[] args ) {
        File indexDir = new File( INDEX_DIR );
        if ( !indexDir.exists() ) {
            indexDir.mkdirs();
        }

        Vector files = new Vector();
        collectFiles( new File( mDirToIndex ), files );

        try {
            IndexWriter writer = new IndexWriter( INDEX_DIR, new 
StandardAnalyzer(), true );

            for ( int ix = 0; ix < files.size(); ix++ ) {
                File file = ( File ) files.get( ix );
                writer.addDocument( IndexTestDocument.createDocument(
file ) );
            }
            System.out.println( "Added: " + files.size() + " files." );

            writer.optimize();
            writer.close();
            writer = null;

            Searcher searcher = new IndexSearcher( INDEX_DIR );
            Analyzer analyzer = new StandardAnalyzer();
            Query query = QueryParser.parse( "Ant", "contents", analyzer
);

            Hits hits = searcher.search( query );
            System.out.println( "Hits after add: " + hits.length() );
            searcher.close();

            Directory directory = FSDirectory.getDirectory( INDEX_DIR,
false );
            IndexReader reader = IndexReader.open( directory );

            int count = 0;
            for ( int ix = 0; ix < files.size(); ix++ ) {
                String path = IndexTestDocument.normalizePath( ( ( File
) 
files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );

                int numDocs = reader.numDocs();
                boolean bDeleted = false;
                for ( int ndx = 0; ndx < numDocs; ndx++ ) {
                    if ( !reader.isDeleted( ndx ) ) {
                        String docPath = IndexTestDocument.getPath( 
reader.document( ndx ) );
                        if ( docPath.equals( path ) ) {
                            count++;
                            reader.delete( ndx );
                            bDeleted = true;
                            break;
                        }
                    }
                }
                if ( !bDeleted ) {
                    System.out.println( "  Not Deleted: " + path );
                    for( int ndx = 0; ndx < numDocs; ndx++ ) {
                        if ( !reader.isDeleted( ndx ) ) {
                            String docPath = IndexTestDocument.getPath( 
reader.document( ndx ) );
                            System.out.println( "      path " + ndx + ":
" + 
docPath );
                        }
                    }
                }
            }
            System.out.println( "Removed " + count + " documents of (" +

files.size() + ")" );
            reader.close();

            searcher = new IndexSearcher( INDEX_DIR );
            analyzer = new StandardAnalyzer();
            query = QueryParser.parse( "Ant", "contents", analyzer );

            hits = searcher.search( query );
            System.out.println( "Hits after remove: " + hits.length() );

        } catch ( Exception ex ) {
            ex.printStackTrace();
        }
    }


    static class IndexTestDocument {

        static public Document createDocument( File f )
            throws FileNotFoundException {
            Document doc = new Document();
            doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
);
            Reader reader = new BufferedReader( new InputStreamReader(
new 
FileInputStream( f ) ) );
            doc.add( Field.Text( "contents", reader ) );
            return doc;
        }


        static public String getPath( Document doc ) {
            return ( String ) doc.get( "path" );
        }

        static public String normalizePath( String path ) {
            if ( path == null || path.length() == 0 ) {
                return "";
            }
            path = path.replace( '\\', '/' );
            File f = new File( path );
            if ( f.isDirectory() ) {
                if ( path.charAt( path.length() - 1 ) != '/' ) {
                    path = path + "/";
                }
            }
            return path;
        }
    }
}



-- 
Center for Agile Technology          phone: 512.232.4399
The University of Texas at Austin    fax: 512.232.6413
3925 West Braker Lane                email: rick@cat.utexas.edu
MCC Suite 3.11040 CAT                http://cat.utexas.edu/
Austin, TX   78759-5316


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by "Nader S. Henein" <ns...@bayt.net>.
I run my delete finction without opimizing because it take too long
and because it doesn't inflate the number of files in the index like an
insert does
(it just adds a file, I imagine for exclusion purposes until the next
optimize)
and it works fine.

here's how I delete :

    public static synchronized void deleteIndexEntry ( String filePath ,
String fieldValue  ) {
        String fieldName = "" ;
        String indexTag = "" ;

        fieldName = "id" ;
        IndexReader reader = null;
        try {
            reader = IndexReader.open(indexPath);
            Term targetTerm  = new Term(fieldName,fieldValue) ;
            reader.delete(targetTerm) ;
            reader.close();
        } catch (java.io.IOException e) {
            errorText = errorText.concat("DeleteIndex :"+e+"\n") ;
        }
    }


-----Original Message-----
From: Karl Øie [mailto:karl@gan.no]
Sent: Monday, June 17, 2002 5:38 PM
To: Lucene Users List
Subject: Re: Deleting documents from index question.


hi, i think you must run writer.optimize after deleting docs before it takes
effect, deleted documents are only marked as deleted until then...


mvh karl øie

On Monday 17 June 2002 15:33, Rick Vestal wrote:
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted.  My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem.  Note
> that you will have to change the path at the top to a valid set
> of files on your machine.  If anybody has any ideas on why I
> am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
>  * Created by IntelliJ IDEA.
>  * User: rvestal
>  * Date: Jun 16, 2002
>  * Time: 10:23:51 PM
>  * To change template for new class use
>  * Code Style | Class Templates options (Tools | IDE Options).
>  */
> package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
>     // path to ant 1.4.1 docs
>     private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";
>
>     private static String INDEX_DIR = "indexTest";
>
>
>     static private void collectFiles( File dir, Vector files ) {
>         File[] children = dir.listFiles();
>         for ( int ix = 0; ix < children.length; ix++ ) {
>             File child = children[ix];
>             if ( child.isDirectory() ) {
>                 collectFiles( child, files );
>             } else {
>                 files.add( child );
>             }
>         }
>     }
>
>
>     public static void main( String[] args ) {
>         File indexDir = new File( INDEX_DIR );
>         if ( !indexDir.exists() ) {
>             indexDir.mkdirs();
>         }
>
>         Vector files = new Vector();
>         collectFiles( new File( mDirToIndex ), files );
>
>         try {
>             IndexWriter writer = new IndexWriter( INDEX_DIR, new
> StandardAnalyzer(), true );
>
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 File file = ( File ) files.get( ix );
>                 writer.addDocument( IndexTestDocument.createDocument(
> file ) );
>             }
>             System.out.println( "Added: " + files.size() + " files." );
>
>             writer.optimize();
>             writer.close();
>             writer = null;
>
>             Searcher searcher = new IndexSearcher( INDEX_DIR );
>             Analyzer analyzer = new StandardAnalyzer();
>             Query query = QueryParser.parse( "Ant", "contents", analyzer
> );
>
>             Hits hits = searcher.search( query );
>             System.out.println( "Hits after add: " + hits.length() );
>             searcher.close();
>
>             Directory directory = FSDirectory.getDirectory( INDEX_DIR,
> false );
>             IndexReader reader = IndexReader.open( directory );
>
>             int count = 0;
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 String path = IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
>                 int numDocs = reader.numDocs();
>                 boolean bDeleted = false;
>                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
>                     if ( !reader.isDeleted( ndx ) ) {
>                         String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
>                         if ( docPath.equals( path ) ) {
>                             count++;
>                             reader.delete( ndx );
>                             bDeleted = true;
>                             break;
>                         }
>                     }
>                 }
>                 if ( !bDeleted ) {
>                     System.out.println( "  Not Deleted: " + path );
>                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
>                         if ( !reader.isDeleted( ndx ) ) {
>                             String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
>                             System.out.println( "      path " + ndx + ":
> " +
> docPath );
>                         }
>                     }
>                 }
>             }
>             System.out.println( "Removed " + count + " documents of (" +
>
> files.size() + ")" );
>             reader.close();
>
>             searcher = new IndexSearcher( INDEX_DIR );
>             analyzer = new StandardAnalyzer();
>             query = QueryParser.parse( "Ant", "contents", analyzer );
>
>             hits = searcher.search( query );
>             System.out.println( "Hits after remove: " + hits.length() );
>
>         } catch ( Exception ex ) {
>             ex.printStackTrace();
>         }
>     }
>
>
>     static class IndexTestDocument {
>
>         static public Document createDocument( File f )
>             throws FileNotFoundException {
>             Document doc = new Document();
>             doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
> );
>             Reader reader = new BufferedReader( new InputStreamReader(
> new
> FileInputStream( f ) ) );
>             doc.add( Field.Text( "contents", reader ) );
>             return doc;
>         }
>
>
>         static public String getPath( Document doc ) {
>             return ( String ) doc.get( "path" );
>         }
>
>         static public String normalizePath( String path ) {
>             if ( path == null || path.length() == 0 ) {
>                 return "";
>             }
>             path = path.replace( '\\', '/' );
>             File f = new File( path );
>             if ( f.isDirectory() ) {
>                 if ( path.charAt( path.length() - 1 ) != '/' ) {
>                     path = path + "/";
>                 }
>             }
>             return path;
>         }
>     }
> }


--
To unsubscribe, e-mail:
<ma...@jakarta.apache.org>
For additional commands, e-mail:
<ma...@jakarta.apache.org>



--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by Rick Vestal <ri...@cat.utexas.edu>.
I believe that did the trick!

Thanks for the info.

-- Rick

> -----Original Message-----
> From: Karl Øie [mailto:karl@gan.no] 
> Sent: Monday, June 17, 2002 8:38 AM
> To: Lucene Users List
> Subject: Re: Deleting documents from index question.
> 
> 
> hi, i think you must run writer.optimize after deleting docs 
> before it takes 
> effect, deleted documents are only marked as deleted until then...
> 
> 
> mvh karl øie
> 
> On Monday 17 June 2002 15:33, Rick Vestal wrote:
> > Good morning all,
> >
> > I'm trying to delete a set of documents from an index,
> > and am running into a problem where all the documents are
> > not deleted.  My problem is either the way I am using the 
> API or it is 
> > a bug in lucene...I'm not sure which one it is.
> >
> > I've included a sample program here that shows the problem. 
>  Note that 
> > you will have to change the path at the top to a valid set 
> of files on 
> > your machine.  If anybody has any ideas on why I am not 
> removing the 
> > files correctly, please let me know.
> >
> > Thanks,
> >
> > -- Rick
> >
> > /*
> >  * Created by IntelliJ IDEA.
> >  * User: rvestal
> >  * Date: Jun 16, 2002
> >  * Time: 10:23:51 PM
> >  * To change template for new class use
> >  * Code Style | Class Templates options (Tools | IDE Options).  */
> > package org.intellij.plugins.docPlugin;
> >
> > import org.apache.lucene.analysis.Analyzer;
> > import org.apache.lucene.analysis.standard.StandardAnalyzer;
> > import org.apache.lucene.document.*;
> > import org.apache.lucene.index.*;
> > import org.apache.lucene.queryParser.QueryParser;
> > import org.apache.lucene.search.*;
> > import org.apache.lucene.store.*;
> >
> > import java.io.*;
> > import java.util.Vector;
> >
> > public class IndexTest {
> >
> >     // path to ant 1.4.1 docs
> >     private static String mDirToIndex = 
> > "c:/utils/ant/docs/manual/api/";
> >
> >     private static String INDEX_DIR = "indexTest";
> >
> >
> >     static private void collectFiles( File dir, Vector files ) {
> >         File[] children = dir.listFiles();
> >         for ( int ix = 0; ix < children.length; ix++ ) {
> >             File child = children[ix];
> >             if ( child.isDirectory() ) {
> >                 collectFiles( child, files );
> >             } else {
> >                 files.add( child );
> >             }
> >         }
> >     }
> >
> >
> >     public static void main( String[] args ) {
> >         File indexDir = new File( INDEX_DIR );
> >         if ( !indexDir.exists() ) {
> >             indexDir.mkdirs();
> >         }
> >
> >         Vector files = new Vector();
> >         collectFiles( new File( mDirToIndex ), files );
> >
> >         try {
> >             IndexWriter writer = new IndexWriter( INDEX_DIR, new 
> > StandardAnalyzer(), true );
> >
> >             for ( int ix = 0; ix < files.size(); ix++ ) {
> >                 File file = ( File ) files.get( ix );
> >                 writer.addDocument( 
> IndexTestDocument.createDocument( 
> > file ) );
> >             }
> >             System.out.println( "Added: " + files.size() + 
> " files." 
> > );
> >
> >             writer.optimize();
> >             writer.close();
> >             writer = null;
> >
> >             Searcher searcher = new IndexSearcher( INDEX_DIR );
> >             Analyzer analyzer = new StandardAnalyzer();
> >             Query query = QueryParser.parse( "Ant", "contents", 
> > analyzer );
> >
> >             Hits hits = searcher.search( query );
> >             System.out.println( "Hits after add: " + 
> hits.length() );
> >             searcher.close();
> >
> >             Directory directory = FSDirectory.getDirectory( 
> INDEX_DIR, 
> > false );
> >             IndexReader reader = IndexReader.open( directory );
> >
> >             int count = 0;
> >             for ( int ix = 0; ix < files.size(); ix++ ) {
> >                 String path = IndexTestDocument.normalizePath( ( ( 
> > File
> > )
> > files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
> >
> >                 int numDocs = reader.numDocs();
> >                 boolean bDeleted = false;
> >                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> >                     if ( !reader.isDeleted( ndx ) ) {
> >                         String docPath = IndexTestDocument.getPath( 
> > reader.document( ndx ) );
> >                         if ( docPath.equals( path ) ) {
> >                             count++;
> >                             reader.delete( ndx );
> >                             bDeleted = true;
> >                             break;
> >                         }
> >                     }
> >                 }
> >                 if ( !bDeleted ) {
> >                     System.out.println( "  Not Deleted: " + path );
> >                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
> >                         if ( !reader.isDeleted( ndx ) ) {
> >                             String docPath = 
> > IndexTestDocument.getPath( reader.document( ndx ) );
> >                             System.out.println( "      path 
> " + ndx + ":
> > " +
> > docPath );
> >                         }
> >                     }
> >                 }
> >             }
> >             System.out.println( "Removed " + count + " 
> documents of (" 
> > +
> >
> > files.size() + ")" );
> >             reader.close();
> >
> >             searcher = new IndexSearcher( INDEX_DIR );
> >             analyzer = new StandardAnalyzer();
> >             query = QueryParser.parse( "Ant", "contents", 
> analyzer );
> >
> >             hits = searcher.search( query );
> >             System.out.println( "Hits after remove: " + 
> hits.length() 
> > );
> >
> >         } catch ( Exception ex ) {
> >             ex.printStackTrace();
> >         }
> >     }
> >
> >
> >     static class IndexTestDocument {
> >
> >         static public Document createDocument( File f )
> >             throws FileNotFoundException {
> >             Document doc = new Document();
> >             doc.add( Field.Text( "path", normalizePath( 
> f.getPath() ) 
> > ) );
> >             Reader reader = new BufferedReader( new 
> InputStreamReader( 
> > new FileInputStream( f ) ) );
> >             doc.add( Field.Text( "contents", reader ) );
> >             return doc;
> >         }
> >
> >
> >         static public String getPath( Document doc ) {
> >             return ( String ) doc.get( "path" );
> >         }
> >
> >         static public String normalizePath( String path ) {
> >             if ( path == null || path.length() == 0 ) {
> >                 return "";
> >             }
> >             path = path.replace( '\\', '/' );
> >             File f = new File( path );
> >             if ( f.isDirectory() ) {
> >                 if ( path.charAt( path.length() - 1 ) != '/' ) {
> >                     path = path + "/";
> >                 }
> >             }
> >             return path;
> >         }
> >     }
> > }
> 
> 
> --
> To unsubscribe, e-mail:   
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For 
> additional commands, 
> e-mail: <ma...@jakarta.apache.org>
> 


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


Re: Deleting documents from index question.

Posted by Karl Øie <ka...@gan.no>.
hi, i think you must run writer.optimize after deleting docs before it takes 
effect, deleted documents are only marked as deleted until then...


mvh karl øie

On Monday 17 June 2002 15:33, Rick Vestal wrote:
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted.  My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem.  Note
> that you will have to change the path at the top to a valid set
> of files on your machine.  If anybody has any ideas on why I
> am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
>  * Created by IntelliJ IDEA.
>  * User: rvestal
>  * Date: Jun 16, 2002
>  * Time: 10:23:51 PM
>  * To change template for new class use
>  * Code Style | Class Templates options (Tools | IDE Options).
>  */
> package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
>     // path to ant 1.4.1 docs
>     private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";
>
>     private static String INDEX_DIR = "indexTest";
>
>
>     static private void collectFiles( File dir, Vector files ) {
>         File[] children = dir.listFiles();
>         for ( int ix = 0; ix < children.length; ix++ ) {
>             File child = children[ix];
>             if ( child.isDirectory() ) {
>                 collectFiles( child, files );
>             } else {
>                 files.add( child );
>             }
>         }
>     }
>
>
>     public static void main( String[] args ) {
>         File indexDir = new File( INDEX_DIR );
>         if ( !indexDir.exists() ) {
>             indexDir.mkdirs();
>         }
>
>         Vector files = new Vector();
>         collectFiles( new File( mDirToIndex ), files );
>
>         try {
>             IndexWriter writer = new IndexWriter( INDEX_DIR, new
> StandardAnalyzer(), true );
>
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 File file = ( File ) files.get( ix );
>                 writer.addDocument( IndexTestDocument.createDocument(
> file ) );
>             }
>             System.out.println( "Added: " + files.size() + " files." );
>
>             writer.optimize();
>             writer.close();
>             writer = null;
>
>             Searcher searcher = new IndexSearcher( INDEX_DIR );
>             Analyzer analyzer = new StandardAnalyzer();
>             Query query = QueryParser.parse( "Ant", "contents", analyzer
> );
>
>             Hits hits = searcher.search( query );
>             System.out.println( "Hits after add: " + hits.length() );
>             searcher.close();
>
>             Directory directory = FSDirectory.getDirectory( INDEX_DIR,
> false );
>             IndexReader reader = IndexReader.open( directory );
>
>             int count = 0;
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 String path = IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
>                 int numDocs = reader.numDocs();
>                 boolean bDeleted = false;
>                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
>                     if ( !reader.isDeleted( ndx ) ) {
>                         String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
>                         if ( docPath.equals( path ) ) {
>                             count++;
>                             reader.delete( ndx );
>                             bDeleted = true;
>                             break;
>                         }
>                     }
>                 }
>                 if ( !bDeleted ) {
>                     System.out.println( "  Not Deleted: " + path );
>                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
>                         if ( !reader.isDeleted( ndx ) ) {
>                             String docPath = IndexTestDocument.getPath(
> reader.document( ndx ) );
>                             System.out.println( "      path " + ndx + ":
> " +
> docPath );
>                         }
>                     }
>                 }
>             }
>             System.out.println( "Removed " + count + " documents of (" +
>
> files.size() + ")" );
>             reader.close();
>
>             searcher = new IndexSearcher( INDEX_DIR );
>             analyzer = new StandardAnalyzer();
>             query = QueryParser.parse( "Ant", "contents", analyzer );
>
>             hits = searcher.search( query );
>             System.out.println( "Hits after remove: " + hits.length() );
>
>         } catch ( Exception ex ) {
>             ex.printStackTrace();
>         }
>     }
>
>
>     static class IndexTestDocument {
>
>         static public Document createDocument( File f )
>             throws FileNotFoundException {
>             Document doc = new Document();
>             doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
> );
>             Reader reader = new BufferedReader( new InputStreamReader(
> new
> FileInputStream( f ) ) );
>             doc.add( Field.Text( "contents", reader ) );
>             return doc;
>         }
>
>
>         static public String getPath( Document doc ) {
>             return ( String ) doc.get( "path" );
>         }
>
>         static public String normalizePath( String path ) {
>             if ( path == null || path.length() == 0 ) {
>                 return "";
>             }
>             path = path.replace( '\\', '/' );
>             File f = new File( path );
>             if ( f.isDirectory() ) {
>                 if ( path.charAt( path.length() - 1 ) != '/' ) {
>                     path = path + "/";
>                 }
>             }
>             return path;
>         }
>     }
> }


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by Rick Vestal <ri...@cat.utexas.edu>.
Undesired results.

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net] 
> Sent: Monday, June 17, 2002 9:16 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
> 
> 
> define incorrect .. a crash or a un-desired results
> 
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 6:10 PM
> To: 'Lucene Users List'; nsh@bayt.net
> Subject: RE: Deleting documents from index question.
> 
> 
> I had tried this and it had the same incorrect result.
> 
> Thanks,
> 
> -- Rick
> 
> > -----Original Message-----
> > From: Nader S. Henein [mailto:nsh@bayt.net]
> > Sent: Monday, June 17, 2002 9:09 AM
> > To: Lucene Users List
> > Subject: RE: Deleting documents from index question.
> >
> >
> > PS: try closing the reader after you're done deleating and 
> open a new 
> > one for the search, kind a like commiting a transaction to 
> a normal DB
> >
> > -----Original Message-----
> > From: Rick Vestal [mailto:rick@cat.utexas.edu]
> > Sent: Monday, June 17, 2002 5:34 PM
> > To: lucene-user@jakarta.apache.org
> > Subject: Deleting documents from index question.
> >
> >
> > Good morning all,
> >
> > I'm trying to delete a set of documents from an index,
> > and am running into a problem where all the documents are
> > not deleted.  My problem is either the way I am using the 
> API or it is 
> > a bug in lucene...I'm not sure which one it is.
> >
> > I've included a sample program here that shows the problem. 
> Note that 
> > you will have to change the path at the top to a valid set 
> of files on 
> > your machine.  If anybody has any ideas on why I am not 
> removing the 
> > files correctly, please let me know.
> >
> > Thanks,
> >
> > -- Rick
> >
> > /*
> >  * Created by IntelliJ IDEA.
> >  * User: rvestal
> >  * Date: Jun 16, 2002
> >  * Time: 10:23:51 PM
> >  * To change template for new class use
> >  * Code Style | Class Templates options (Tools | IDE Options).  */ 
> > package org.intellij.plugins.docPlugin;
> >
> > import org.apache.lucene.analysis.Analyzer;
> > import org.apache.lucene.analysis.standard.StandardAnalyzer;
> > import org.apache.lucene.document.*;
> > import org.apache.lucene.index.*;
> > import org.apache.lucene.queryParser.QueryParser;
> > import org.apache.lucene.search.*;
> > import org.apache.lucene.store.*;
> >
> > import java.io.*;
> > import java.util.Vector;
> >
> > public class IndexTest {
> >
> >     // path to ant 1.4.1 docs
> >     private static String mDirToIndex = 
> > "c:/utils/ant/docs/manual/api/";
> >
> >     private static String INDEX_DIR = "indexTest";
> >
> >
> >     static private void collectFiles( File dir, Vector files ) {
> >         File[] children = dir.listFiles();
> >         for ( int ix = 0; ix < children.length; ix++ ) {
> >             File child = children[ix];
> >             if ( child.isDirectory() ) {
> >                 collectFiles( child, files );
> >             } else {
> >                 files.add( child );
> >             }
> >         }
> >     }
> >
> >
> >     public static void main( String[] args ) {
> >         File indexDir = new File( INDEX_DIR );
> >         if ( !indexDir.exists() ) {
> >             indexDir.mkdirs();
> >         }
> >
> >         Vector files = new Vector();
> >         collectFiles( new File( mDirToIndex ), files );
> >
> >         try {
> >             IndexWriter writer = new IndexWriter( INDEX_DIR, new 
> > StandardAnalyzer(), true );
> >
> >             for ( int ix = 0; ix < files.size(); ix++ ) {
> >                 File file = ( File ) files.get( ix );
> >                 writer.addDocument( 
> IndexTestDocument.createDocument( 
> > file ) );
> >             }
> >             System.out.println( "Added: " + files.size() + 
> " files." 
> > );
> >
> >             writer.optimize();
> >             writer.close();
> >             writer = null;
> >
> >             Searcher searcher = new IndexSearcher( INDEX_DIR );
> >             Analyzer analyzer = new StandardAnalyzer();
> >             Query query = QueryParser.parse( "Ant", "contents", 
> > analyzer );
> >
> >             Hits hits = searcher.search( query );
> >             System.out.println( "Hits after add: " + 
> hits.length() );
> >             searcher.close();
> >
> >             Directory directory = FSDirectory.getDirectory( 
> INDEX_DIR, 
> > false );
> >             IndexReader reader = IndexReader.open( directory );
> >
> >             int count = 0;
> >             for ( int ix = 0; ix < files.size(); ix++ ) {
> >                 String path = IndexTestDocument.normalizePath( ( ( 
> > File
> > )
> > files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
> >
> >                 int numDocs = reader.numDocs();
> >                 boolean bDeleted = false;
> >                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
> >                     if ( !reader.isDeleted( ndx ) ) {
> >                         String docPath = IndexTestDocument.getPath( 
> > reader.document( ndx ) );
> >                         if ( docPath.equals( path ) ) {
> >                             count++;
> >                             reader.delete( ndx );
> >                             bDeleted = true;
> >                             break;
> >                         }
> >                     }
> >                 }
> >                 if ( !bDeleted ) {
> >                     System.out.println( "  Not Deleted: " + path );
> >                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
> >                         if ( !reader.isDeleted( ndx ) ) {
> >                             String docPath = 
> > IndexTestDocument.getPath( reader.document( ndx ) );
> >                             System.out.println( "      path "
> > + ndx + ":
> > " +
> > docPath );
> >                         }
> >                     }
> >                 }
> >             }
> >             System.out.println( "Removed " + count + " 
> documents of (" 
> > +
> >
> > files.size() + ")" );
> >             reader.close();
> >
> >             searcher = new IndexSearcher( INDEX_DIR );
> >             analyzer = new StandardAnalyzer();
> >             query = QueryParser.parse( "Ant", "contents", 
> analyzer );
> >
> >             hits = searcher.search( query );
> >             System.out.println( "Hits after remove: " +
> > hits.length() );
> >
> >         } catch ( Exception ex ) {
> >             ex.printStackTrace();
> >         }
> >     }
> >
> >
> >     static class IndexTestDocument {
> >
> >         static public Document createDocument( File f )
> >             throws FileNotFoundException {
> >             Document doc = new Document();
> >             doc.add( Field.Text( "path", normalizePath(
> > f.getPath() ) ) );
> >             Reader reader = new BufferedReader( new 
> InputStreamReader( 
> > new FileInputStream( f ) ) );
> >             doc.add( Field.Text( "contents", reader ) );
> >             return doc;
> >         }
> >
> >
> >         static public String getPath( Document doc ) {
> >             return ( String ) doc.get( "path" );
> >         }
> >
> >         static public String normalizePath( String path ) {
> >             if ( path == null || path.length() == 0 ) {
> >                 return "";
> >             }
> >             path = path.replace( '\\', '/' );
> >             File f = new File( path );
> >             if ( f.isDirectory() ) {
> >                 if ( path.charAt( path.length() - 1 ) != '/' ) {
> >                     path = path + "/";
> >                 }
> >             }
> >             return path;
> >         }
> >     }
> > }
> >
> >
> >
> > --
> > Center for Agile Technology          phone: 512.232.4399
> > The University of Texas at Austin    fax: 512.232.6413
> > 3925 West Braker Lane                email: rick@cat.utexas.edu
> > MCC Suite 3.11040 CAT                http://cat.utexas.edu/
> > Austin, TX   78759-5316
> >
> >
> > --
> > To unsubscribe, e-mail:
> > <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> > For
> > additional commands,
> > e-mail: <ma...@jakarta.apache.org>
> >
> >
> >
> > --
> > To unsubscribe, e-mail:
> > <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> > For
> > additional commands,
> > e-mail: <ma...@jakarta.apache.org>
> >
> 
> 
> --
> To unsubscribe, e-mail: 
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For 
> additional commands, 
> e-mail: <ma...@jakarta.apache.org>
> 
> 
> 
> --
> To unsubscribe, e-mail:   
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For 
> additional commands, 
> e-mail: <ma...@jakarta.apache.org>
> 


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by "Nader S. Henein" <ns...@bayt.net>.
define incorrect .. a crash or a un-desired results

-----Original Message-----
From: Rick Vestal [mailto:rick@cat.utexas.edu]
Sent: Monday, June 17, 2002 6:10 PM
To: 'Lucene Users List'; nsh@bayt.net
Subject: RE: Deleting documents from index question.


I had tried this and it had the same incorrect result.

Thanks,

-- Rick

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net]
> Sent: Monday, June 17, 2002 9:09 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
>
>
> PS: try closing the reader after you're done deleating and
> open a new one for the search, kind a like commiting a
> transaction to a normal DB
>
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 5:34 PM
> To: lucene-user@jakarta.apache.org
> Subject: Deleting documents from index question.
>
>
> Good morning all,
>
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted.  My problem is either the way I am using the API
> or it is a bug in lucene...I'm not sure which one it is.
>
> I've included a sample program here that shows the problem.
> Note that you will have to change the path at the top to a
> valid set of files on your machine.  If anybody has any ideas
> on why I am not removing the files correctly, please let me know.
>
> Thanks,
>
> -- Rick
>
> /*
>  * Created by IntelliJ IDEA.
>  * User: rvestal
>  * Date: Jun 16, 2002
>  * Time: 10:23:51 PM
>  * To change template for new class use
>  * Code Style | Class Templates options (Tools | IDE
> Options).  */ package org.intellij.plugins.docPlugin;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
>
> import java.io.*;
> import java.util.Vector;
>
> public class IndexTest {
>
>     // path to ant 1.4.1 docs
>     private static String mDirToIndex =
> "c:/utils/ant/docs/manual/api/";
>
>     private static String INDEX_DIR = "indexTest";
>
>
>     static private void collectFiles( File dir, Vector files ) {
>         File[] children = dir.listFiles();
>         for ( int ix = 0; ix < children.length; ix++ ) {
>             File child = children[ix];
>             if ( child.isDirectory() ) {
>                 collectFiles( child, files );
>             } else {
>                 files.add( child );
>             }
>         }
>     }
>
>
>     public static void main( String[] args ) {
>         File indexDir = new File( INDEX_DIR );
>         if ( !indexDir.exists() ) {
>             indexDir.mkdirs();
>         }
>
>         Vector files = new Vector();
>         collectFiles( new File( mDirToIndex ), files );
>
>         try {
>             IndexWriter writer = new IndexWriter( INDEX_DIR,
> new StandardAnalyzer(), true );
>
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 File file = ( File ) files.get( ix );
>                 writer.addDocument(
> IndexTestDocument.createDocument( file ) );
>             }
>             System.out.println( "Added: " + files.size() + "
> files." );
>
>             writer.optimize();
>             writer.close();
>             writer = null;
>
>             Searcher searcher = new IndexSearcher( INDEX_DIR );
>             Analyzer analyzer = new StandardAnalyzer();
>             Query query = QueryParser.parse( "Ant",
> "contents", analyzer );
>
>             Hits hits = searcher.search( query );
>             System.out.println( "Hits after add: " + hits.length() );
>             searcher.close();
>
>             Directory directory = FSDirectory.getDirectory(
> INDEX_DIR, false );
>             IndexReader reader = IndexReader.open( directory );
>
>             int count = 0;
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 String path =
> IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
>
>                 int numDocs = reader.numDocs();
>                 boolean bDeleted = false;
>                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
>                     if ( !reader.isDeleted( ndx ) ) {
>                         String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
>                         if ( docPath.equals( path ) ) {
>                             count++;
>                             reader.delete( ndx );
>                             bDeleted = true;
>                             break;
>                         }
>                     }
>                 }
>                 if ( !bDeleted ) {
>                     System.out.println( "  Not Deleted: " + path );
>                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
>                         if ( !reader.isDeleted( ndx ) ) {
>                             String docPath =
> IndexTestDocument.getPath( reader.document( ndx ) );
>                             System.out.println( "      path "
> + ndx + ":
> " +
> docPath );
>                         }
>                     }
>                 }
>             }
>             System.out.println( "Removed " + count + "
> documents of (" +
>
> files.size() + ")" );
>             reader.close();
>
>             searcher = new IndexSearcher( INDEX_DIR );
>             analyzer = new StandardAnalyzer();
>             query = QueryParser.parse( "Ant", "contents", analyzer );
>
>             hits = searcher.search( query );
>             System.out.println( "Hits after remove: " +
> hits.length() );
>
>         } catch ( Exception ex ) {
>             ex.printStackTrace();
>         }
>     }
>
>
>     static class IndexTestDocument {
>
>         static public Document createDocument( File f )
>             throws FileNotFoundException {
>             Document doc = new Document();
>             doc.add( Field.Text( "path", normalizePath(
> f.getPath() ) ) );
>             Reader reader = new BufferedReader( new
> InputStreamReader( new FileInputStream( f ) ) );
>             doc.add( Field.Text( "contents", reader ) );
>             return doc;
>         }
>
>
>         static public String getPath( Document doc ) {
>             return ( String ) doc.get( "path" );
>         }
>
>         static public String normalizePath( String path ) {
>             if ( path == null || path.length() == 0 ) {
>                 return "";
>             }
>             path = path.replace( '\\', '/' );
>             File f = new File( path );
>             if ( f.isDirectory() ) {
>                 if ( path.charAt( path.length() - 1 ) != '/' ) {
>                     path = path + "/";
>                 }
>             }
>             return path;
>         }
>     }
> }
>
>
>
> --
> Center for Agile Technology          phone: 512.232.4399
> The University of Texas at Austin    fax: 512.232.6413
> 3925 West Braker Lane                email: rick@cat.utexas.edu
> MCC Suite 3.11040 CAT                http://cat.utexas.edu/
> Austin, TX   78759-5316
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <ma...@jakarta.apache.org>
>
>
>
> --
> To unsubscribe, e-mail:
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For
> additional commands,
> e-mail: <ma...@jakarta.apache.org>
>


--
To unsubscribe, e-mail:
<ma...@jakarta.apache.org>
For additional commands, e-mail:
<ma...@jakarta.apache.org>



--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by Rick Vestal <ri...@cat.utexas.edu>.
I had tried this and it had the same incorrect result.

Thanks,

-- Rick

> -----Original Message-----
> From: Nader S. Henein [mailto:nsh@bayt.net] 
> Sent: Monday, June 17, 2002 9:09 AM
> To: Lucene Users List
> Subject: RE: Deleting documents from index question.
> 
> 
> PS: try closing the reader after you're done deleating and 
> open a new one for the search, kind a like commiting a 
> transaction to a normal DB
> 
> -----Original Message-----
> From: Rick Vestal [mailto:rick@cat.utexas.edu]
> Sent: Monday, June 17, 2002 5:34 PM
> To: lucene-user@jakarta.apache.org
> Subject: Deleting documents from index question.
> 
> 
> Good morning all,
> 
> I'm trying to delete a set of documents from an index,
> and am running into a problem where all the documents are
> not deleted.  My problem is either the way I am using the API 
> or it is a bug in lucene...I'm not sure which one it is.
> 
> I've included a sample program here that shows the problem.  
> Note that you will have to change the path at the top to a 
> valid set of files on your machine.  If anybody has any ideas 
> on why I am not removing the files correctly, please let me know.
> 
> Thanks,
> 
> -- Rick
> 
> /*
>  * Created by IntelliJ IDEA.
>  * User: rvestal
>  * Date: Jun 16, 2002
>  * Time: 10:23:51 PM
>  * To change template for new class use
>  * Code Style | Class Templates options (Tools | IDE 
> Options).  */ package org.intellij.plugins.docPlugin;
> 
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.*;
> import org.apache.lucene.index.*;
> import org.apache.lucene.queryParser.QueryParser;
> import org.apache.lucene.search.*;
> import org.apache.lucene.store.*;
> 
> import java.io.*;
> import java.util.Vector;
> 
> public class IndexTest {
> 
>     // path to ant 1.4.1 docs
>     private static String mDirToIndex = 
> "c:/utils/ant/docs/manual/api/";
> 
>     private static String INDEX_DIR = "indexTest";
> 
> 
>     static private void collectFiles( File dir, Vector files ) {
>         File[] children = dir.listFiles();
>         for ( int ix = 0; ix < children.length; ix++ ) {
>             File child = children[ix];
>             if ( child.isDirectory() ) {
>                 collectFiles( child, files );
>             } else {
>                 files.add( child );
>             }
>         }
>     }
> 
> 
>     public static void main( String[] args ) {
>         File indexDir = new File( INDEX_DIR );
>         if ( !indexDir.exists() ) {
>             indexDir.mkdirs();
>         }
> 
>         Vector files = new Vector();
>         collectFiles( new File( mDirToIndex ), files );
> 
>         try {
>             IndexWriter writer = new IndexWriter( INDEX_DIR, 
> new StandardAnalyzer(), true );
> 
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 File file = ( File ) files.get( ix );
>                 writer.addDocument( 
> IndexTestDocument.createDocument( file ) );
>             }
>             System.out.println( "Added: " + files.size() + " 
> files." );
> 
>             writer.optimize();
>             writer.close();
>             writer = null;
> 
>             Searcher searcher = new IndexSearcher( INDEX_DIR );
>             Analyzer analyzer = new StandardAnalyzer();
>             Query query = QueryParser.parse( "Ant", 
> "contents", analyzer );
> 
>             Hits hits = searcher.search( query );
>             System.out.println( "Hits after add: " + hits.length() );
>             searcher.close();
> 
>             Directory directory = FSDirectory.getDirectory( 
> INDEX_DIR, false );
>             IndexReader reader = IndexReader.open( directory );
> 
>             int count = 0;
>             for ( int ix = 0; ix < files.size(); ix++ ) {
>                 String path = 
> IndexTestDocument.normalizePath( ( ( File
> )
> files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );
> 
>                 int numDocs = reader.numDocs();
>                 boolean bDeleted = false;
>                 for ( int ndx = 0; ndx < numDocs; ndx++ ) {
>                     if ( !reader.isDeleted( ndx ) ) {
>                         String docPath = 
> IndexTestDocument.getPath( reader.document( ndx ) );
>                         if ( docPath.equals( path ) ) {
>                             count++;
>                             reader.delete( ndx );
>                             bDeleted = true;
>                             break;
>                         }
>                     }
>                 }
>                 if ( !bDeleted ) {
>                     System.out.println( "  Not Deleted: " + path );
>                     for( int ndx = 0; ndx < numDocs; ndx++ ) {
>                         if ( !reader.isDeleted( ndx ) ) {
>                             String docPath = 
> IndexTestDocument.getPath( reader.document( ndx ) );
>                             System.out.println( "      path " 
> + ndx + ":
> " +
> docPath );
>                         }
>                     }
>                 }
>             }
>             System.out.println( "Removed " + count + " 
> documents of (" +
> 
> files.size() + ")" );
>             reader.close();
> 
>             searcher = new IndexSearcher( INDEX_DIR );
>             analyzer = new StandardAnalyzer();
>             query = QueryParser.parse( "Ant", "contents", analyzer );
> 
>             hits = searcher.search( query );
>             System.out.println( "Hits after remove: " + 
> hits.length() );
> 
>         } catch ( Exception ex ) {
>             ex.printStackTrace();
>         }
>     }
> 
> 
>     static class IndexTestDocument {
> 
>         static public Document createDocument( File f )
>             throws FileNotFoundException {
>             Document doc = new Document();
>             doc.add( Field.Text( "path", normalizePath( 
> f.getPath() ) ) );
>             Reader reader = new BufferedReader( new 
> InputStreamReader( new FileInputStream( f ) ) );
>             doc.add( Field.Text( "contents", reader ) );
>             return doc;
>         }
> 
> 
>         static public String getPath( Document doc ) {
>             return ( String ) doc.get( "path" );
>         }
> 
>         static public String normalizePath( String path ) {
>             if ( path == null || path.length() == 0 ) {
>                 return "";
>             }
>             path = path.replace( '\\', '/' );
>             File f = new File( path );
>             if ( f.isDirectory() ) {
>                 if ( path.charAt( path.length() - 1 ) != '/' ) {
>                     path = path + "/";
>                 }
>             }
>             return path;
>         }
>     }
> }
> 
> 
> 
> --
> Center for Agile Technology          phone: 512.232.4399
> The University of Texas at Austin    fax: 512.232.6413
> 3925 West Braker Lane                email: rick@cat.utexas.edu
> MCC Suite 3.11040 CAT                http://cat.utexas.edu/
> Austin, TX   78759-5316
> 
> 
> --
> To unsubscribe, e-mail: 
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For 
> additional commands, 
> e-mail: <ma...@jakarta.apache.org>
> 
> 
> 
> --
> To unsubscribe, e-mail:   
> <mailto:lucene-user-> unsubscribe@jakarta.apache.org>
> For 
> additional commands, 
> e-mail: <ma...@jakarta.apache.org>
> 


--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>


RE: Deleting documents from index question.

Posted by "Nader S. Henein" <ns...@bayt.net>.
PS: try closing the reader after you're done deleating and open a new one
for the search,
kind a like commiting a transaction to a normal DB

-----Original Message-----
From: Rick Vestal [mailto:rick@cat.utexas.edu]
Sent: Monday, June 17, 2002 5:34 PM
To: lucene-user@jakarta.apache.org
Subject: Deleting documents from index question.


Good morning all,

I'm trying to delete a set of documents from an index,
and am running into a problem where all the documents are
not deleted.  My problem is either the way I am using the API
or it is a bug in lucene...I'm not sure which one it is.

I've included a sample program here that shows the problem.  Note
that you will have to change the path at the top to a valid set
of files on your machine.  If anybody has any ideas on why I
am not removing the files correctly, please let me know.

Thanks,

-- Rick

/*
 * Created by IntelliJ IDEA.
 * User: rvestal
 * Date: Jun 16, 2002
 * Time: 10:23:51 PM
 * To change template for new class use
 * Code Style | Class Templates options (Tools | IDE Options).
 */
package org.intellij.plugins.docPlugin;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;

import java.io.*;
import java.util.Vector;

public class IndexTest {

    // path to ant 1.4.1 docs
    private static String mDirToIndex = "c:/utils/ant/docs/manual/api/";

    private static String INDEX_DIR = "indexTest";


    static private void collectFiles( File dir, Vector files ) {
        File[] children = dir.listFiles();
        for ( int ix = 0; ix < children.length; ix++ ) {
            File child = children[ix];
            if ( child.isDirectory() ) {
                collectFiles( child, files );
            } else {
                files.add( child );
            }
        }
    }


    public static void main( String[] args ) {
        File indexDir = new File( INDEX_DIR );
        if ( !indexDir.exists() ) {
            indexDir.mkdirs();
        }

        Vector files = new Vector();
        collectFiles( new File( mDirToIndex ), files );

        try {
            IndexWriter writer = new IndexWriter( INDEX_DIR, new
StandardAnalyzer(), true );

            for ( int ix = 0; ix < files.size(); ix++ ) {
                File file = ( File ) files.get( ix );
                writer.addDocument( IndexTestDocument.createDocument(
file ) );
            }
            System.out.println( "Added: " + files.size() + " files." );

            writer.optimize();
            writer.close();
            writer = null;

            Searcher searcher = new IndexSearcher( INDEX_DIR );
            Analyzer analyzer = new StandardAnalyzer();
            Query query = QueryParser.parse( "Ant", "contents", analyzer
);

            Hits hits = searcher.search( query );
            System.out.println( "Hits after add: " + hits.length() );
            searcher.close();

            Directory directory = FSDirectory.getDirectory( INDEX_DIR,
false );
            IndexReader reader = IndexReader.open( directory );

            int count = 0;
            for ( int ix = 0; ix < files.size(); ix++ ) {
                String path = IndexTestDocument.normalizePath( ( ( File
)
files.get( ix ) ).getAbsolutePath().replace( '\\', '/' ) );

                int numDocs = reader.numDocs();
                boolean bDeleted = false;
                for ( int ndx = 0; ndx < numDocs; ndx++ ) {
                    if ( !reader.isDeleted( ndx ) ) {
                        String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
                        if ( docPath.equals( path ) ) {
                            count++;
                            reader.delete( ndx );
                            bDeleted = true;
                            break;
                        }
                    }
                }
                if ( !bDeleted ) {
                    System.out.println( "  Not Deleted: " + path );
                    for( int ndx = 0; ndx < numDocs; ndx++ ) {
                        if ( !reader.isDeleted( ndx ) ) {
                            String docPath = IndexTestDocument.getPath(
reader.document( ndx ) );
                            System.out.println( "      path " + ndx + ":
" +
docPath );
                        }
                    }
                }
            }
            System.out.println( "Removed " + count + " documents of (" +

files.size() + ")" );
            reader.close();

            searcher = new IndexSearcher( INDEX_DIR );
            analyzer = new StandardAnalyzer();
            query = QueryParser.parse( "Ant", "contents", analyzer );

            hits = searcher.search( query );
            System.out.println( "Hits after remove: " + hits.length() );

        } catch ( Exception ex ) {
            ex.printStackTrace();
        }
    }


    static class IndexTestDocument {

        static public Document createDocument( File f )
            throws FileNotFoundException {
            Document doc = new Document();
            doc.add( Field.Text( "path", normalizePath( f.getPath() ) )
);
            Reader reader = new BufferedReader( new InputStreamReader(
new
FileInputStream( f ) ) );
            doc.add( Field.Text( "contents", reader ) );
            return doc;
        }


        static public String getPath( Document doc ) {
            return ( String ) doc.get( "path" );
        }

        static public String normalizePath( String path ) {
            if ( path == null || path.length() == 0 ) {
                return "";
            }
            path = path.replace( '\\', '/' );
            File f = new File( path );
            if ( f.isDirectory() ) {
                if ( path.charAt( path.length() - 1 ) != '/' ) {
                    path = path + "/";
                }
            }
            return path;
        }
    }
}



--
Center for Agile Technology          phone: 512.232.4399
The University of Texas at Austin    fax: 512.232.6413
3925 West Braker Lane                email: rick@cat.utexas.edu
MCC Suite 3.11040 CAT                http://cat.utexas.edu/
Austin, TX   78759-5316


--
To unsubscribe, e-mail:
<ma...@jakarta.apache.org>
For additional commands, e-mail:
<ma...@jakarta.apache.org>



--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>