You are viewing a plain text version of this content. The canonical link for it is here.
Posted to jetspeed-dev@portals.apache.org by pa...@apache.org on 2003/05/07 07:38:30 UTC

cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

paulsp      2003/05/06 22:38:30

  Modified:    src/java/org/apache/jetspeed/services/lucene
                        TestLuceneSearch.java LuceneSearchService.java
  Log:
  Added add() method.
  Note:
    Add() will allows duplicates.  To be  fixed later.
    Title and description are not parsed out ot the document.
  
  Revision  Changes    Path
  1.2       +42 -3     jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java
  
  Index: TestLuceneSearch.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/TestLuceneSearch.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TestLuceneSearch.java	7 May 2003 00:17:16 -0000	1.1
  +++ TestLuceneSearch.java	7 May 2003 05:38:29 -0000	1.2
  @@ -54,6 +54,7 @@
   
   package org.apache.jetspeed.services.lucene;
   
  +import java.net.URL;
   // Java imports
   import java.util.HashMap;
   
  @@ -140,6 +141,15 @@
           return new TestSuite( TestLuceneSearch.class );
       }
       
  +    public void testPutWebPage() throws Exception
  +    {
  +        URL jetspeedHomePage = new URL("http://jakarta.apache.org/jetspeed");
  +        assertNotNull("Created URL to Jetspeed Home Page",  jetspeedHomePage);
  +        assertTrue("Adding to index", LuceneSearch.add(jetspeedHomePage));
  +        assertTrue("Adding to index", LuceneSearch.add(new URL("http://www.google.com")));
  +        assertTrue("Adding to index", LuceneSearch.add(new URL("http://jakarta.apache.org")));
  +    }
  +
       /**
        *
        * @throws Exception
  @@ -147,8 +157,23 @@
       public void testVerifyJetspeedSearch() throws Exception
       {
           SearchResult result = null;
  -        SearchResults results  = LuceneSearch.search( "+jetspeed +overview");
  -        System.out.println("hits = " + results.size());
  +        SearchResults results  = LuceneSearch.search( "Jetspeed");
  +        System.out.println("Query 'Jetspeed' hits = " + results.size());
  +        for (int i = 0; i < results.size(); i++)
  +        {
  +            result = results.get(i);
  +            System.out.println("Score = " + result.getScore());
  +            System.out.println("title = " + result.getTitle());
  +            System.out.println("summary = " + result.getDescription());
  +            System.out.println("url = " + result.getDocumentURL());
  +        }
  +    }
  +
  +    public void testVerifyJetspeedSearch1() throws Exception
  +    {
  +        SearchResult result = null;
  +        SearchResults results  = LuceneSearch.search( "Jetspeed Lucene");
  +        System.out.println("Query 'Jetspeed Lucene' hits = " + results.size());
           for (int i = 0; i < results.size(); i++)
           {
               result = results.get(i);
  @@ -159,4 +184,18 @@
           }
       }
   
  +    public void testVerifyJetspeedSearch2() throws Exception
  +    {
  +        SearchResult result = null;
  +        SearchResults results  = LuceneSearch.search( "google");
  +        System.out.println("Query 'goggle' hits = " + results.size());
  +        for (int i = 0; i < results.size(); i++)
  +        {
  +            result = results.get(i);
  +            System.out.println("Score = " + result.getScore());
  +            System.out.println("title = " + result.getTitle());
  +            System.out.println("summary = " + result.getDescription());
  +            System.out.println("url = " + result.getDocumentURL());
  +        }
  +    }
   }
  
  
  
  1.2       +114 -17   jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java
  
  Index: LuceneSearchService.java
  ===================================================================
  RCS file: /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/LuceneSearchService.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneSearchService.java	7 May 2003 00:17:16 -0000	1.1
  +++ LuceneSearchService.java	7 May 2003 05:38:29 -0000	1.2
  @@ -59,6 +59,10 @@
   import java.io.IOException;
   import java.net.URL;
   import javax.servlet.ServletConfig;
  +import org.apache.commons.httpclient.HttpClient;
  +import org.apache.commons.httpclient.HttpException;
  +import org.apache.commons.httpclient.HttpMethod;
  +import org.apache.commons.httpclient.methods.GetMethod;
   
   // Jetspeed imports
   
  @@ -75,6 +79,7 @@
   import org.apache.lucene.analysis.Analyzer;
   import org.apache.lucene.analysis.standard.StandardAnalyzer;
   import org.apache.lucene.document.Document;
  +import org.apache.lucene.document.Field;
   import org.apache.lucene.index.IndexWriter;
   import org.apache.lucene.queryParser.QueryParser;
   import org.apache.lucene.search.IndexSearcher;
  @@ -87,12 +92,19 @@
       
       public static final String SERVICE_NAME = "LuceneSearch";
       private static final String CONFIG_DIRECTORY = "directory";
  +    private static final String CONFIG_CONTENT_FIELDNAME = "fieldname.content";
  +    private static final String CONFIG_CONTENT_FIELDNAME_DEFAULT = "Content";
       private static final String CONFIG_DESCRIPTION_FIELDNAME = "fieldname.description";
  +    private static final String CONFIG_DESCRIPTION_FIELDNAME_DEFAULT = "Description";
       private static final String CONFIG_TITLE_FIELDNAME = "fieldname.title";
  +    private static final String CONFIG_TITLE_FIELDNAME_DEFAULT = "Title";
       private static final String CONFIG_URL_FIELDNAME = "fieldname.url";
  +    private static final String CONFIG_URL_FIELDNAME_DEFAULT = "URL";
  +    private String contentFieldName = null;
       private String descriptionFieldName = null;
  +    private File rootDir = null;
       private String indexRoot = null;
  -    private Searcher searcher = null;
  +    //    private Searcher searcher = null;
       private String titleFieldName = null;
       private String urlFieldName  = null;
       
  @@ -172,16 +184,17 @@
           .getResources(LuceneSearchService.SERVICE_NAME);
           
           // Get config properties
  -        descriptionFieldName  = serviceConf.getString( CONFIG_DESCRIPTION_FIELDNAME);
  -        titleFieldName  = serviceConf.getString( CONFIG_TITLE_FIELDNAME);
  -        urlFieldName  = serviceConf.getString( CONFIG_URL_FIELDNAME);
  +        contentFieldName  = serviceConf.getString( CONFIG_CONTENT_FIELDNAME, CONFIG_CONTENT_FIELDNAME_DEFAULT);
  +        descriptionFieldName  = serviceConf.getString(CONFIG_DESCRIPTION_FIELDNAME, CONFIG_DESCRIPTION_FIELDNAME_DEFAULT);
  +        titleFieldName  = serviceConf.getString(CONFIG_TITLE_FIELDNAME, CONFIG_TITLE_FIELDNAME_DEFAULT);
  +        urlFieldName  = serviceConf.getString(CONFIG_URL_FIELDNAME, CONFIG_URL_FIELDNAME_DEFAULT);
           indexRoot = serviceConf.getString( CONFIG_DIRECTORY);
           
           //
           // The following section opens or creates the search index
           //
           //
  -        File rootDir = new File(indexRoot);
  +        rootDir = new File(indexRoot);
           
           //If the rootDir does not exist, treat it as context relative
           if (!rootDir.exists())
  @@ -200,7 +213,9 @@
           
           try
           {
  +            Searcher searcher = null;
               searcher = new IndexSearcher( rootDir.getPath());
  +            searcher.close();
           }
           catch (Exception e)
           {
  @@ -211,7 +226,6 @@
                   indexWriter.close();
                   indexWriter = null;
                   Log.info("Created Lucene Index in " + rootDir.getPath());
  -                searcher = new IndexSearcher(rootDir.getPath());
               }
               catch (Exception e1)
               {
  @@ -230,21 +244,23 @@
        *
        * @param searchString is the what is being searched for
        * @return Hits, if no hits then null.
  +     *
  +     * @task Parse content into title and description fields
        */
       public SearchResults search(String searchString)
       {
  +        Searcher searcher = null;
           Hits hits = null;
           try
           {
  +            searcher = new IndexSearcher(rootDir.getPath());
               Analyzer analyzer = new StandardAnalyzer();
  -            
  -            Query query = QueryParser.parse(searchString, "contents", analyzer);
  -            System.out.println("Searching for: " + query.toString("contents"));
  -            
  +            Query query = QueryParser.parse(searchString, this.contentFieldName, analyzer);
               hits = searcher.search(query);
           }
           catch (Exception e)
           {
  +            e.printStackTrace();
               Log.error(e);
           }
           
  @@ -259,22 +275,33 @@
               {
                   doc = hits.doc(counter);
                   result.setScore(hits.score(counter));
  -                result.setDescription(doc.getField(this.descriptionFieldName).toString());
  -                result.setTitle(doc.getField(this.titleFieldName).toString());
  -                result.setDocumentURL(doc.getField(this.urlFieldName).toString());
  +                //result.setDescription(doc.getField(this.descriptionFieldName).toString());
  +                //result.setTitle(doc.getField(this.titleFieldName).toString());
  +                result.setDocumentURL(doc.getField(this.urlFieldName).stringValue());
                   results.add(counter, result);
               }
               catch (IOException ioe)
               {
                   Log.error(ioe);
  -                throw new Error("Error retrieving search results", ioe);
               }
               result = null;
           }
  +        
  +        if (searcher != null)
  +        {
  +            try
  +            {
  +                searcher.close();
  +            }
  +            catch (IOException ioe)
  +            {
  +                Log.error("Closing Searcher", ioe);
  +            }
  +        }
           return results;
       }
  -
  -    /** 
  +    
  +    /**
        * Add a page to be indexed.
        *
        * @return true is page added sucessfully added.
  @@ -282,7 +309,77 @@
        */
       public boolean add(URL pageToAdd)
       {
  -        return false;
  +        HttpClient client = new HttpClient();
  +        client.startSession(pageToAdd);
  +        GetMethod method = new GetMethod("/");
  +        method.setFollowRedirects(true);
  +        int statusCode = -1;
  +        int attempt = 0;
  +        
  +        // We will retry up to 3 times.
  +        while (statusCode == -1 && attempt < 3)
  +        {
  +            try
  +            {
  +                // execute the method.
  +                client.executeMethod(method);
  +                statusCode = method.getStatusCode();
  +                System.out.println("Status code = " + statusCode);
  +            }
  +            catch (HttpException e)
  +            {
  +                // We will retry
  +            }
  +            catch (IOException e)
  +            {
  +                return false;
  +            }
  +        }
  +        // Check that we didn't run out of retries.
  +        if (statusCode != -1)
  +        {
  +            String content = null;
  +            try
  +            {
  +                content = method.getDataAsString();
  +            }
  +            catch (IOException ioe)
  +            {
  +                Log.error("Getting content for " + pageToAdd.toString(), ioe);
  +            }
  +            
  +            if (content != null)
  +            {
  +                try
  +                {
  +                    Document doc = new Document();
  +                    doc.add(Field.Text(this.urlFieldName, pageToAdd.toString()));
  +                    doc.add(Field.Text(this.contentFieldName, content));
  +                    IndexWriter indexWriter = new IndexWriter( rootDir, new StandardAnalyzer(), false);
  +                    indexWriter.addDocument(doc);
  +                    System.out.println("Index Docuemnt Count = " + indexWriter.docCount());
  +                    indexWriter.optimize();
  +                    indexWriter.close();
  +                    Log.info("Added '" + pageToAdd.toString() + "' to index");
  +                }
  +                catch (Exception e)
  +                {
  +                    e.printStackTrace();
  +                    Log.error("Adding document to index", e);
  +                }
  +            }
  +        }
  +        try
  +        {
  +            client.endSession();
  +        }
  +        catch (IOException ioe)
  +        {
  +            ioe.printStackTrace();
  +            Log.error("Ending session to " + pageToAdd.toString(), ioe);
  +        }
  +        
  +        return (statusCode == 200);
       }
       
       public String[] getSearchSets()
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,
>
> 1) httpclient v1.0 was already in the lib directory
>

I will probably look really stupid when you tell me where it is - I can't
see it there. Is it under a different name?

> 2) Yes, lucene v1.3 Release Candidate 1, but I am not aware of  a reason
> not to use it.  I have requested the jar be added to the Maven repository
>

Cool.

Best regards,

Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/



---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

Posted by Paul Spencer <pa...@mindspring.com>.
Mark,

1) httpclient v1.0 was already in the lib directory

2) Yes, lucene v1.3 Release Candidate 1, but I am not aware of  a reason 
not to use it.  I have requested the jar be added to the Maven repository

Paul Spencer

Mark Orciuch wrote:

>Paul,
>
>Looks like there's another runtime dependency: commons-httpclient. Also, I'm
>curious why did you use lucene-1.3-rc1 and not the current stable release
>1.2. Version 1.3-rc1 is not in the maven repository.
>
>Best regards,
>
>Mark Orciuch - morciuch@apache.org
>Jakarta Jetspeed - Enterprise Portal in Java
>http://jakarta.apache.org/jetspeed/
>
>  
>
>>-----Original Message-----
>>From: Paul Spencer [mailto:paulspencer@mindspring.com]
>>Sent: Wednesday, May 07, 2003 4:48 PM
>>To: Jetspeed Developers List
>>Subject: Re: cvs commit:
>>jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
>>TestLuceneSearch.java LuceneSearchService.java
>>
>>
>>Mark,
>>Thanks for catching the missing jar.
>>
>>Paul Spencer
>>
>>Mark Orciuch wrote:
>>
>>    
>>
>>>Paul,
>>>
>>>We need a lucene .jar checked in to get this to compile. Or have you been
>>>working with Maven too much lately ;-)
>>>
>>>Best regards,
>>>
>>>Mark Orciuch - morciuch@apache.org
>>>Jakarta Jetspeed - Enterprise Portal in Java
>>>http://jakarta.apache.org/jetspeed/
>>>
>>>
>>>
>>>      
>>>
>>
>>
>>---------------------------------------------------------------------
>>To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
>>For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>>
>>
>>
>>
>>    
>>
>
>
>
>---------------------------------------------------------------------
>To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
>For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>  
>




---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,

Looks like there's another runtime dependency: commons-httpclient. Also, I'm
curious why did you use lucene-1.3-rc1 and not the current stable release
1.2. Version 1.3-rc1 is not in the maven repository.

Best regards,

Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/

> -----Original Message-----
> From: Paul Spencer [mailto:paulspencer@mindspring.com]
> Sent: Wednesday, May 07, 2003 4:48 PM
> To: Jetspeed Developers List
> Subject: Re: cvs commit:
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
> TestLuceneSearch.java LuceneSearchService.java
>
>
> Mark,
> Thanks for catching the missing jar.
>
> Paul Spencer
>
> Mark Orciuch wrote:
>
> >Paul,
> >
> >We need a lucene .jar checked in to get this to compile. Or have you been
> >working with Maven too much lately ;-)
> >
> >Best regards,
> >
> >Mark Orciuch - morciuch@apache.org
> >Jakarta Jetspeed - Enterprise Portal in Java
> >http://jakarta.apache.org/jetspeed/
> >
> >
> >
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>
>



---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

Posted by Paul Spencer <pa...@mindspring.com>.
Mark,
Thanks for catching the missing jar.

Paul Spencer

Mark Orciuch wrote:

>Paul,
>
>We need a lucene .jar checked in to get this to compile. Or have you been
>working with Maven too much lately ;-)
>
>Best regards,
>
>Mark Orciuch - morciuch@apache.org
>Jakarta Jetspeed - Enterprise Portal in Java
>http://jakarta.apache.org/jetspeed/
>
>  
>




---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org


RE: cvs commit: jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene TestLuceneSearch.java LuceneSearchService.java

Posted by Mark Orciuch <ma...@ngsltd.com>.
Paul,

We need a lucene .jar checked in to get this to compile. Or have you been
working with Maven too much lately ;-)

Best regards,

Mark Orciuch - morciuch@apache.org
Jakarta Jetspeed - Enterprise Portal in Java
http://jakarta.apache.org/jetspeed/

> -----Original Message-----
> From: paulsp@apache.org [mailto:paulsp@apache.org]
> Sent: Wednesday, May 07, 2003 12:39 AM
> To: jakarta-jetspeed-cvs@apache.org
> Subject: cvs commit:
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene
> TestLuceneSearch.java LuceneSearchService.java
>
>
> paulsp      2003/05/06 22:38:30
>
>   Modified:    src/java/org/apache/jetspeed/services/lucene
>                         TestLuceneSearch.java LuceneSearchService.java
>   Log:
>   Added add() method.
>   Note:
>     Add() will allows duplicates.  To be  fixed later.
>     Title and description are not parsed out ot the document.
>
>   Revision  Changes    Path
>   1.2       +42 -3
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/Test
> LuceneSearch.java
>
>   Index: TestLuceneSearch.java
>   ===================================================================
>   RCS file:
> /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/l
> ucene/TestLuceneSearch.java,v
>   retrieving revision 1.1
>   retrieving revision 1.2
>   diff -u -r1.1 -r1.2
>   --- TestLuceneSearch.java	7 May 2003 00:17:16 -0000	1.1
>   +++ TestLuceneSearch.java	7 May 2003 05:38:29 -0000	1.2
>   @@ -54,6 +54,7 @@
>
>    package org.apache.jetspeed.services.lucene;
>
>   +import java.net.URL;
>    // Java imports
>    import java.util.HashMap;
>
>   @@ -140,6 +141,15 @@
>            return new TestSuite( TestLuceneSearch.class );
>        }
>
>   +    public void testPutWebPage() throws Exception
>   +    {
>   +        URL jetspeedHomePage = new
> URL("http://jakarta.apache.org/jetspeed");
>   +        assertNotNull("Created URL to Jetspeed Home Page",
> jetspeedHomePage);
>   +        assertTrue("Adding to index",
> LuceneSearch.add(jetspeedHomePage));
>   +        assertTrue("Adding to index", LuceneSearch.add(new
> URL("http://www.google.com")));
>   +        assertTrue("Adding to index", LuceneSearch.add(new
> URL("http://jakarta.apache.org")));
>   +    }
>   +
>        /**
>         *
>         * @throws Exception
>   @@ -147,8 +157,23 @@
>        public void testVerifyJetspeedSearch() throws Exception
>        {
>            SearchResult result = null;
>   -        SearchResults results  = LuceneSearch.search(
> "+jetspeed +overview");
>   -        System.out.println("hits = " + results.size());
>   +        SearchResults results  = LuceneSearch.search( "Jetspeed");
>   +        System.out.println("Query 'Jetspeed' hits = " +
> results.size());
>   +        for (int i = 0; i < results.size(); i++)
>   +        {
>   +            result = results.get(i);
>   +            System.out.println("Score = " + result.getScore());
>   +            System.out.println("title = " + result.getTitle());
>   +            System.out.println("summary = " + result.getDescription());
>   +            System.out.println("url = " + result.getDocumentURL());
>   +        }
>   +    }
>   +
>   +    public void testVerifyJetspeedSearch1() throws Exception
>   +    {
>   +        SearchResult result = null;
>   +        SearchResults results  = LuceneSearch.search(
> "Jetspeed Lucene");
>   +        System.out.println("Query 'Jetspeed Lucene' hits = " +
> results.size());
>            for (int i = 0; i < results.size(); i++)
>            {
>                result = results.get(i);
>   @@ -159,4 +184,18 @@
>            }
>        }
>
>   +    public void testVerifyJetspeedSearch2() throws Exception
>   +    {
>   +        SearchResult result = null;
>   +        SearchResults results  = LuceneSearch.search( "google");
>   +        System.out.println("Query 'goggle' hits = " + results.size());
>   +        for (int i = 0; i < results.size(); i++)
>   +        {
>   +            result = results.get(i);
>   +            System.out.println("Score = " + result.getScore());
>   +            System.out.println("title = " + result.getTitle());
>   +            System.out.println("summary = " + result.getDescription());
>   +            System.out.println("url = " + result.getDocumentURL());
>   +        }
>   +    }
>    }
>
>
>
>   1.2       +114 -17
> jakarta-jetspeed/src/java/org/apache/jetspeed/services/lucene/Luce
> neSearchService.java
>
>   Index: LuceneSearchService.java
>   ===================================================================
>   RCS file:
> /home/cvs/jakarta-jetspeed/src/java/org/apache/jetspeed/services/l
> ucene/LuceneSearchService.java,v
>   retrieving revision 1.1
>   retrieving revision 1.2
>   diff -u -r1.1 -r1.2
>   --- LuceneSearchService.java	7 May 2003 00:17:16 -0000	1.1
>   +++ LuceneSearchService.java	7 May 2003 05:38:29 -0000	1.2
>   @@ -59,6 +59,10 @@
>    import java.io.IOException;
>    import java.net.URL;
>    import javax.servlet.ServletConfig;
>   +import org.apache.commons.httpclient.HttpClient;
>   +import org.apache.commons.httpclient.HttpException;
>   +import org.apache.commons.httpclient.HttpMethod;
>   +import org.apache.commons.httpclient.methods.GetMethod;
>
>    // Jetspeed imports
>
>   @@ -75,6 +79,7 @@
>    import org.apache.lucene.analysis.Analyzer;
>    import org.apache.lucene.analysis.standard.StandardAnalyzer;
>    import org.apache.lucene.document.Document;
>   +import org.apache.lucene.document.Field;
>    import org.apache.lucene.index.IndexWriter;
>    import org.apache.lucene.queryParser.QueryParser;
>    import org.apache.lucene.search.IndexSearcher;
>   @@ -87,12 +92,19 @@
>
>        public static final String SERVICE_NAME = "LuceneSearch";
>        private static final String CONFIG_DIRECTORY = "directory";
>   +    private static final String CONFIG_CONTENT_FIELDNAME =
> "fieldname.content";
>   +    private static final String
> CONFIG_CONTENT_FIELDNAME_DEFAULT = "Content";
>        private static final String CONFIG_DESCRIPTION_FIELDNAME =
> "fieldname.description";
>   +    private static final String
> CONFIG_DESCRIPTION_FIELDNAME_DEFAULT = "Description";
>        private static final String CONFIG_TITLE_FIELDNAME =
> "fieldname.title";
>   +    private static final String CONFIG_TITLE_FIELDNAME_DEFAULT
> = "Title";
>        private static final String CONFIG_URL_FIELDNAME = "fieldname.url";
>   +    private static final String CONFIG_URL_FIELDNAME_DEFAULT = "URL";
>   +    private String contentFieldName = null;
>        private String descriptionFieldName = null;
>   +    private File rootDir = null;
>        private String indexRoot = null;
>   -    private Searcher searcher = null;
>   +    //    private Searcher searcher = null;
>        private String titleFieldName = null;
>        private String urlFieldName  = null;
>
>   @@ -172,16 +184,17 @@
>            .getResources(LuceneSearchService.SERVICE_NAME);
>
>            // Get config properties
>   -        descriptionFieldName  = serviceConf.getString(
> CONFIG_DESCRIPTION_FIELDNAME);
>   -        titleFieldName  = serviceConf.getString(
> CONFIG_TITLE_FIELDNAME);
>   -        urlFieldName  = serviceConf.getString( CONFIG_URL_FIELDNAME);
>   +        contentFieldName  = serviceConf.getString(
> CONFIG_CONTENT_FIELDNAME, CONFIG_CONTENT_FIELDNAME_DEFAULT);
>   +        descriptionFieldName  =
> serviceConf.getString(CONFIG_DESCRIPTION_FIELDNAME,
> CONFIG_DESCRIPTION_FIELDNAME_DEFAULT);
>   +        titleFieldName  =
> serviceConf.getString(CONFIG_TITLE_FIELDNAME,
> CONFIG_TITLE_FIELDNAME_DEFAULT);
>   +        urlFieldName  =
> serviceConf.getString(CONFIG_URL_FIELDNAME, CONFIG_URL_FIELDNAME_DEFAULT);
>            indexRoot = serviceConf.getString( CONFIG_DIRECTORY);
>
>            //
>            // The following section opens or creates the search index
>            //
>            //
>   -        File rootDir = new File(indexRoot);
>   +        rootDir = new File(indexRoot);
>
>            //If the rootDir does not exist, treat it as context relative
>            if (!rootDir.exists())
>   @@ -200,7 +213,9 @@
>
>            try
>            {
>   +            Searcher searcher = null;
>                searcher = new IndexSearcher( rootDir.getPath());
>   +            searcher.close();
>            }
>            catch (Exception e)
>            {
>   @@ -211,7 +226,6 @@
>                    indexWriter.close();
>                    indexWriter = null;
>                    Log.info("Created Lucene Index in " +
> rootDir.getPath());
>   -                searcher = new IndexSearcher(rootDir.getPath());
>                }
>                catch (Exception e1)
>                {
>   @@ -230,21 +244,23 @@
>         *
>         * @param searchString is the what is being searched for
>         * @return Hits, if no hits then null.
>   +     *
>   +     * @task Parse content into title and description fields
>         */
>        public SearchResults search(String searchString)
>        {
>   +        Searcher searcher = null;
>            Hits hits = null;
>            try
>            {
>   +            searcher = new IndexSearcher(rootDir.getPath());
>                Analyzer analyzer = new StandardAnalyzer();
>   -
>   -            Query query = QueryParser.parse(searchString,
> "contents", analyzer);
>   -            System.out.println("Searching for: " +
> query.toString("contents"));
>   -
>   +            Query query = QueryParser.parse(searchString,
> this.contentFieldName, analyzer);
>                hits = searcher.search(query);
>            }
>            catch (Exception e)
>            {
>   +            e.printStackTrace();
>                Log.error(e);
>            }
>
>   @@ -259,22 +275,33 @@
>                {
>                    doc = hits.doc(counter);
>                    result.setScore(hits.score(counter));
>   -
> result.setDescription(doc.getField(this.descriptionFieldName).toString());
>   -
> result.setTitle(doc.getField(this.titleFieldName).toString());
>   -
> result.setDocumentURL(doc.getField(this.urlFieldName).toString());
>   +
> //result.setDescription(doc.getField(this.descriptionFieldName).to
> String());
>   +
> //result.setTitle(doc.getField(this.titleFieldName).toString());
>   +
> result.setDocumentURL(doc.getField(this.urlFieldName).stringValue());
>                    results.add(counter, result);
>                }
>                catch (IOException ioe)
>                {
>                    Log.error(ioe);
>   -                throw new Error("Error retrieving search
> results", ioe);
>                }
>                result = null;
>            }
>   +
>   +        if (searcher != null)
>   +        {
>   +            try
>   +            {
>   +                searcher.close();
>   +            }
>   +            catch (IOException ioe)
>   +            {
>   +                Log.error("Closing Searcher", ioe);
>   +            }
>   +        }
>            return results;
>        }
>   -
>   -    /**
>   +
>   +    /**
>         * Add a page to be indexed.
>         *
>         * @return true is page added sucessfully added.
>   @@ -282,7 +309,77 @@
>         */
>        public boolean add(URL pageToAdd)
>        {
>   -        return false;
>   +        HttpClient client = new HttpClient();
>   +        client.startSession(pageToAdd);
>   +        GetMethod method = new GetMethod("/");
>   +        method.setFollowRedirects(true);
>   +        int statusCode = -1;
>   +        int attempt = 0;
>   +
>   +        // We will retry up to 3 times.
>   +        while (statusCode == -1 && attempt < 3)
>   +        {
>   +            try
>   +            {
>   +                // execute the method.
>   +                client.executeMethod(method);
>   +                statusCode = method.getStatusCode();
>   +                System.out.println("Status code = " + statusCode);
>   +            }
>   +            catch (HttpException e)
>   +            {
>   +                // We will retry
>   +            }
>   +            catch (IOException e)
>   +            {
>   +                return false;
>   +            }
>   +        }
>   +        // Check that we didn't run out of retries.
>   +        if (statusCode != -1)
>   +        {
>   +            String content = null;
>   +            try
>   +            {
>   +                content = method.getDataAsString();
>   +            }
>   +            catch (IOException ioe)
>   +            {
>   +                Log.error("Getting content for " +
> pageToAdd.toString(), ioe);
>   +            }
>   +
>   +            if (content != null)
>   +            {
>   +                try
>   +                {
>   +                    Document doc = new Document();
>   +                    doc.add(Field.Text(this.urlFieldName,
> pageToAdd.toString()));
>   +                    doc.add(Field.Text(this.contentFieldName,
> content));
>   +                    IndexWriter indexWriter = new IndexWriter(
> rootDir, new StandardAnalyzer(), false);
>   +                    indexWriter.addDocument(doc);
>   +                    System.out.println("Index Docuemnt Count =
> " + indexWriter.docCount());
>   +                    indexWriter.optimize();
>   +                    indexWriter.close();
>   +                    Log.info("Added '" + pageToAdd.toString()
> + "' to index");
>   +                }
>   +                catch (Exception e)
>   +                {
>   +                    e.printStackTrace();
>   +                    Log.error("Adding document to index", e);
>   +                }
>   +            }
>   +        }
>   +        try
>   +        {
>   +            client.endSession();
>   +        }
>   +        catch (IOException ioe)
>   +        {
>   +            ioe.printStackTrace();
>   +            Log.error("Ending session to " +
> pageToAdd.toString(), ioe);
>   +        }
>   +
>   +        return (statusCode == 200);
>        }
>
>        public String[] getSearchSets()
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org
>
>
>
>



---------------------------------------------------------------------
To unsubscribe, e-mail: jetspeed-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: jetspeed-dev-help@jakarta.apache.org